summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--basetypes.py49
-rw-r--r--sms.py105
-rw-r--r--sms/analysis.c17
-rw-r--r--sms/sineSynth.c40
-rw-r--r--sms/sms.c72
-rw-r--r--sms/sms.h19
-rw-r--r--sms/sms.i21
-rw-r--r--sms/spectrum.c59
-rw-r--r--sms/synthesis.c113
-rw-r--r--tests/sms.py111
10 files changed, 402 insertions, 204 deletions
diff --git a/basetypes.py b/basetypes.py
index bcad0a0..c12091d 100644
--- a/basetypes.py
+++ b/basetypes.py
@@ -353,6 +353,23 @@ class Residual(object):
self._hop_size = 512
self._frame_size = 512
+ frame_size = property(lambda self: self.get_frame_size(),
+ lambda self, x: self.set_frame_size(x))
+ hop_size = property(lambda self: self.get_hop_size(),
+ lambda self, x: self.set_hop_size(x))
+
+ def get_frame_size(self):
+ return self._frame_size
+
+ def set_frame_size(self, frame_size):
+ self._frame_size = frame_size
+
+ def get_hop_size(self):
+ return self._hop_size
+
+ def set_hop_size(self, hop_size):
+ self._hop_size = hop_size
+
def residual_frame(self, synth, original):
"Computes the residual signal for a frame of audio"
raise Exception("NotYetImplemented")
@@ -360,21 +377,21 @@ class Residual(object):
def find_residual(self, synth, original):
"Calculate and return the residual signal"
# pad the signals if necessary
- if len(synth) % self._hop_size != 0:
- synth = np.hstack((synth, np.zeros(self._hop_size - (len(synth) % self._hop_size))))
- if len(original) % self._hop_size != 0:
- original = np.hstack((original, np.zeros(self._hop_size - (len(original) % self._hop_size))))
+ if len(synth) % self.hop_size != 0:
+ synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size))))
+ if len(original) % self.hop_size != 0:
+ original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size))))
- num_frames = len(original) / self._hop_size
+ num_frames = len(original) / self.hop_size
residual = simpl.array([])
sample_offset = 0
for i in range(num_frames):
- synth_frame = synth[sample_offset:sample_offset+self._hop_size]
- original_frame = original[sample_offset:sample_offset+self._hop_size]
+ synth_frame = synth[sample_offset:sample_offset+self.hop_size]
+ original_frame = original[sample_offset:sample_offset+self.hop_size]
residual = np.hstack((residual,
self.residual_frame(synth_frame, original_frame)))
- sample_offset += self._hop_size
+ sample_offset += self.hop_size
return residual
def synth_frame(self, synth, original):
@@ -384,20 +401,20 @@ class Residual(object):
def synth(self, synth, original):
"Calculate and return a synthesised residual signal"
# pad the signals if necessary
- if len(synth) % self._hop_size != 0:
- synth = np.hstack((synth, np.zeros(self._hop_size - (len(synth) % self._hop_size))))
- if len(original) % self._hop_size != 0:
- original = np.hstack((original, np.zeros(self._hop_size - (len(original) % self._hop_size))))
+ if len(synth) % self.hop_size != 0:
+ synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size))))
+ if len(original) % self.hop_size != 0:
+ original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size))))
- num_frames = len(original) / self._hop_size
+ num_frames = len(original) / self.hop_size
residual = simpl.array([])
sample_offset = 0
for i in range(num_frames):
- synth_frame = synth[sample_offset:sample_offset+self._hop_size]
- original_frame = original[sample_offset:sample_offset+self._hop_size]
+ synth_frame = synth[sample_offset:sample_offset+self.hop_size]
+ original_frame = original[sample_offset:sample_offset+self.hop_size]
residual = np.hstack((residual,
self.synth_frame(synth_frame, original_frame)))
- sample_offset += self._hop_size
+ sample_offset += self.hop_size
return residual
diff --git a/sms.py b/sms.py
index 625c933..7f205ed 100644
--- a/sms.py
+++ b/sms.py
@@ -39,6 +39,7 @@ class SMSPeakDetection(simpl.PeakDetection):
self._analysis_params.nTracks = self._max_peaks
self._analysis_params.maxPeaks = self._max_peaks
self._analysis_params.nGuides = self._max_peaks
+ self._analysis_params.preEmphasis = 0
if simplsms.sms_initAnalysis(self._analysis_params) != 0:
raise Exception("Error allocating memory for analysis_params")
self._peaks = simplsms.SMS_SpectralPeaks(self._max_peaks)
@@ -199,7 +200,10 @@ class SMSPeakDetection(simpl.PeakDetection):
self._analysis_params.iSizeSound = len(audio)
self.frames = []
pos = 0
- while pos < len(audio):
+ # account for SMS analysis delay
+ # need an extra (max_frame_delay - 1) frames
+ num_samples = (len(audio) - self.hop_size) + ((self.max_frame_delay -1) * self.hop_size)
+ while pos < num_samples:
# get the next frame size
if not self._static_frame_size:
self.frame_size = self.get_next_frame_size()
@@ -231,6 +235,7 @@ class SMSPartialTracking(simpl.PartialTracking):
self._analysis_params.iFormat = simplsms.SMS_FORMAT_HP
self._analysis_params.nTracks = self._max_partials
self._analysis_params.nGuides = self._max_partials
+ self._analysis_params.preEmphasis = 0
if simplsms.sms_initAnalysis(self._analysis_params) != 0:
raise Exception("Error allocating memory for analysis_params")
self._sms_header = simplsms.SMS_Header()
@@ -243,6 +248,36 @@ class SMSPartialTracking(simpl.PartialTracking):
simplsms.sms_freeFrame(self._analysis_frame)
simplsms.sms_free()
+ # properties
+ # TODO: make properties for the remaining analysis parameters
+ max_frequency = property(lambda self: self.get_max_frequency(),
+ lambda self, x: self.set_max_frequency(x))
+ default_fundamental = property(lambda self: self.get_default_fundamental(),
+ lambda self, x: self.set_default_fundamental(x))
+ max_frame_delay = property(lambda self: self.get_max_frame_delay(),
+ lambda self, x: self.set_max_frame_delay(x))
+
+ def get_max_frequency(self):
+ return self._analysis_params.fHighestFreq
+
+ def set_max_frequency(self, max_frequency):
+ self._analysis_params.fHighestFreq = max_frequency
+
+ def get_default_fundamental(self):
+ return self._analysis_params.fDefaultFundamental
+
+ def set_default_fundamental(self, default_fundamental):
+ self._analysis_params.fDefaultFundamental = default_fundamental
+
+ def get_max_frame_delay(self):
+ return self._analysis_params.iMaxDelayFrames
+
+ def set_max_frame_delay(self, max_frame_delay):
+ simplsms.sms_freeAnalysis(self._analysis_params)
+ self._analysis_params.iMaxDelayFrames = max_frame_delay
+ if simplsms.sms_initAnalysis(self._analysis_params) != 0:
+ raise Exception("Error allocating memory for analysis_params")
+
def get_max_partials(self):
return self._analysis_params.nTracks
@@ -290,6 +325,17 @@ class SMSPartialTracking(simpl.PartialTracking):
peaks.append(p)
return peaks
+ def find_partials(self, frames):
+ """Find partials from the sinusoidal peaks in a list of Frames"""
+ self.frames = []
+ for frame in frames:
+ frame.partials = self.update_partials(frame)
+ self.frames.append(frame)
+ # account for SMS analysis delay
+ # the first extra (max_frame_delay) frames are blank
+ if len(self.frames) > (self.max_frame_delay):
+ self.frames = self.frames[self.max_frame_delay:]
+ return self.frames
class SMSSynthesis(simpl.Synthesis):
"Sinusoidal resynthesis using SMS"
@@ -299,13 +345,15 @@ class SMSSynthesis(simpl.Synthesis):
simplsms.sms_init()
self._synth_params = simplsms.SMS_SynthParams()
simplsms.sms_initSynthParams(self._synth_params)
- self._synth_params.iDetSynthType = simplsms.SMS_DET_IFFT
+ self._synth_params.iSamplingRate = self._sampling_rate
+ self._synth_params.iDetSynthType = simplsms.SMS_DET_SIN
self._synth_params.iSynthesisType = simplsms.SMS_STYPE_DET
self._synth_params.iStochasticType = simplsms.SMS_STOC_NONE
- # use the default simpl hop size instead of the default SMS hop size
self._synth_params.sizeHop = self._hop_size
+ self._synth_params.nTracks = self._max_partials
+ self._synth_params.deEmphasis = 0
simplsms.sms_initSynth(self._synth_params)
- self._current_frame = simpl.zeros(self.hop_size)
+ self._current_frame = simpl.zeros(self._hop_size)
self._analysis_frame = simplsms.SMS_Data()
simplsms.sms_allocFrame(self._analysis_frame, self.max_partials,
self.num_stochastic_coeffs, 1, self.stochastic_type, 0)
@@ -336,7 +384,7 @@ class SMSSynthesis(simpl.Synthesis):
simplsms.sms_freeSynth(self._synth_params)
self._synth_params.sizeHop = hop_size
simplsms.sms_initSynth(self._synth_params)
- self._current_frame = simpl.zeros(self.hop_size)
+ self._current_frame = simpl.zeros(hop_size)
def get_max_partials(self):
return self._synth_params.nTracks
@@ -365,7 +413,7 @@ class SMSSynthesis(simpl.Synthesis):
return self._synth_params.iDetSynthesisType
def set_det_synthesis_type(self, det_synthesis_type):
- self._synth_params.iDetSynthesisType = det_synthesis_type
+ self._synth_params.iDetSynthType = det_synthesis_type
def get_num_stochastic_coeffs(self):
return self._synth_params.nStochasticCoeff
@@ -404,7 +452,8 @@ class SMSSynthesis(simpl.Synthesis):
amps = simpl.zeros(self.max_partials)
freqs = simpl.zeros(self.max_partials)
phases = simpl.zeros(self.max_partials)
- for i in range(len(frame.partials)):
+ num_partials = min(self.max_partials, len(frame.partials))
+ for i in range(num_partials):
amps[i] = frame.partials[i].amplitude
freqs[i] = frame.partials[i].frequency
phases[i] = frame.partials[i].phase
@@ -423,25 +472,53 @@ class SMSResidual(simpl.Residual):
simplsms.sms_init()
self._residual_params = simplsms.SMS_ResidualParams()
simplsms.sms_initResidualParams(self._residual_params)
- self._residual_params.residualSize = self._hop_size# * 2
+ self._residual_params.hopSize = self._hop_size
simplsms.sms_initResidual(self._residual_params)
def __del__(self):
simplsms.sms_freeResidual(self._residual_params)
simplsms.sms_free()
+
+ def get_hop_size(self):
+ return self._residual_params.hopSize
+
+ def set_hop_size(self, hop_size):
+ simplsms.sms_freeResidual(self._residual_params)
+ self._residual_params.hopSize = hop_size
+ simplsms.sms_initResidual(self._residual_params)
def residual_frame(self, synth, original):
"Computes the residual signal for a frame of audio"
simplsms.sms_findResidual(synth, original, self._residual_params)
- residual = simpl.zeros(self._residual_params.residualSize)
+ residual = simpl.zeros(self._residual_params.hopSize)
self._residual_params.getResidual(residual)
return residual
+ def find_residual(self, synth, original):
+ "Calculate and return the residual signal"
+ import numpy as np
+ # pad the signals if necessary
+ if len(synth) % self.hop_size != 0:
+ synth = np.hstack((synth, np.zeros(self.hop_size - (len(synth) % self.hop_size))))
+ if len(original) % self.hop_size != 0:
+ original = np.hstack((original, np.zeros(self.hop_size - (len(original) % self.hop_size))))
+
+ num_frames = len(original) / self.hop_size
+ residual = simpl.array([])
+ sample_offset = 0
+
+ for i in range(num_frames):
+ synth_frame = synth[sample_offset:sample_offset+self.hop_size]
+ original_frame = original[sample_offset:sample_offset+self.hop_size]
+ residual = np.hstack((residual,
+ self.residual_frame(synth_frame, original_frame)))
+ sample_offset += self.hop_size
+ return residual
+
def synth_frame(self, synth, original):
"Calculate and return one frame of the synthesised residual signal"
- self.residual_frame(synth, original)
- simplsms.sms_approxResidual(self._residual_params)
- residual_approx = simpl.zeros(self._residual_params.residualSize)
- self._residual_params.getApprox(residual_approx)
- return residual_approx
+ residual = self.residual_frame(synth, original)
+ approx = simpl.zeros(self._residual_params.hopSize)
+ simplsms.sms_approxResidual(residual, approx, self._residual_params)
+ return approx
diff --git a/sms/analysis.c b/sms/analysis.c
index ad8420e..89d4b4a 100644
--- a/sms/analysis.c
+++ b/sms/analysis.c
@@ -210,7 +210,6 @@ int sms_findPeaks(int sizeWaveform, sfloat *pWaveform, SMS_AnalParams *pAnalPara
pSpectralPeaks->pSpectralPeaks[i].fPhase = 0.0;
}
}
- /*printf("\n");*/
return pSpectralPeaks->nPeaks;
}
else
@@ -328,13 +327,16 @@ int sms_findResidual(int sizeSynthesis, sfloat* pSynthesis,
int sizeOriginal, sfloat* pOriginal,
SMS_ResidualParams *residualParams)
{
- if(residualParams->residualSize < sizeOriginal)
+ if(residualParams->hopSize < sizeOriginal)
{
sms_error("Residual signal length is smaller than the original signal length");
return -1;
}
- sms_residual(residualParams->residualSize, pSynthesis, pOriginal, residualParams);
+ sms_residual(residualParams->hopSize, pSynthesis, pOriginal, residualParams);
+ sms_filterHighPass(residualParams->hopSize,
+ residualParams->residual,
+ residualParams->samplingRate);
return 0;
}
@@ -467,8 +469,9 @@ int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_Ana
else if(sizeData < pAnalParams->residualParams.residualSize)
{
/* should only happen if we're at the end of a sound, unless hop size changes */
- sms_getWindow(sizeData, pAnalParams->residualParams.residualWindow, SMS_WIN_HAMMING);
- sms_scaleWindow(sizeData, pAnalParams->residualParams.residualWindow);
+ /* TODO: should the window type be set to pAnalParams->iWindowType? */
+ sms_getWindow(sizeData, pAnalParams->residualParams.fftWindow, SMS_WIN_HAMMING);
+ sms_scaleWindow(sizeData, pAnalParams->residualParams.fftWindow);
}
/* obtain residual sound from original and synthesized sounds. accumulate the residual percentage.*/
@@ -483,13 +486,13 @@ int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_Ana
sms_filterHighPass(sizeData, pAnalParams->residualParams.residual, pAnalParams->iSamplingRate);
/* approximate residual */
- sms_stocAnalysis(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.residualWindow,
+ sms_stocAnalysis(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.fftWindow,
pSmsData, pAnalParams);
}
else if(pAnalParams->iStochasticType == SMS_STOC_IFFT)
{
int sizeMag = sms_power2(sizeData >> 1);
- sms_spectrum(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.residualWindow,
+ sms_spectrum(sizeData, pAnalParams->residualParams.residual, pAnalParams->residualParams.fftWindow,
sizeMag, pSmsData->pFStocCoeff, pSmsData->pResPhase,
pAnalParams->fftBuffer);
}
diff --git a/sms/sineSynth.c b/sms/sineSynth.c
index c91bf65..9882062 100644
--- a/sms/sineSynth.c
+++ b/sms/sineSynth.c
@@ -45,18 +45,18 @@ static void SinePhaSynth(sfloat fFreq, sfloat fMag, sfloat fPhase,
sfloat fAlpha, fBeta, fTmp1, fTmp2;
/* if no mag in last frame copy freq from current and make phase */
- if (pLastFrame->pFSinAmp[iTrack] <= 0)
+ if(pLastFrame->pFSinAmp[iTrack] <= 0)
{
pLastFrame->pFSinFreq[iTrack] = fFreq;
fTmp = fPhase - (fFreq * sizeBuffer);
pLastFrame->pFSinPha[iTrack] = fTmp - floor(fTmp / TWO_PI) * TWO_PI;
}
/* and the other way */
- else if (fMag <= 0)
+ else if(fMag <= 0)
{
fFreq = pLastFrame->pFSinFreq[iTrack];
fTmp = pLastFrame->pFSinPha[iTrack] +
- (pLastFrame->pFSinFreq[iTrack] * sizeBuffer);
+ (pLastFrame->pFSinFreq[iTrack] * sizeBuffer);
fPhase = fTmp - floor(fTmp / TWO_PI) * TWO_PI;
}
@@ -67,27 +67,27 @@ static void SinePhaSynth(sfloat fFreq, sfloat fMag, sfloat fPhase,
/* create instantaneous phase from freq. and phase values */
fTmp1 = fFreq - pLastFrame->pFSinFreq[iTrack];
fTmp2 = ((pLastFrame->pFSinPha[iTrack] +
- pLastFrame->pFSinFreq[iTrack] * sizeBuffer - fPhase) +
- fTmp1 * sizeBuffer / 2.0) / TWO_PI;
- iM = (int) (fTmp2 + .5);
+ pLastFrame->pFSinFreq[iTrack] * sizeBuffer - fPhase) +
+ fTmp1 * sizeBuffer / 2.0) / TWO_PI;
+ iM = (int)(fTmp2 + .5);
fTmp2 = fPhase - pLastFrame->pFSinPha[iTrack] -
- pLastFrame->pFSinFreq[iTrack] * sizeBuffer +
- TWO_PI * iM;
+ pLastFrame->pFSinFreq[iTrack] * sizeBuffer + TWO_PI * iM;
fAlpha = (3.0 / (sfloat)(sizeBuffer * sizeBuffer)) *
- fTmp2 - fTmp1 / sizeBuffer;
+ fTmp2 - fTmp1 / sizeBuffer;
fBeta = (-2.0 / ((sfloat) (sizeBuffer * sizeBuffer * sizeBuffer))) *
- fTmp2 + fTmp1 / ((sfloat) (sizeBuffer * sizeBuffer));
+ fTmp2 + fTmp1 / ((sfloat) (sizeBuffer * sizeBuffer));
for(i=0; i<sizeBuffer; i++)
{
fInstMag += fMagIncr;
fInstPhase = pLastFrame->pFSinPha[iTrack] +
- pLastFrame->pFSinFreq[iTrack] * i +
- fAlpha * i * i + fBeta * i * i * i;
+ pLastFrame->pFSinFreq[iTrack] * i +
+ fAlpha * i * i + fBeta * i * i * i;
- /* pFWaveform[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase + PI_2); */
+ /*pFWaveform[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase + PI_2);*/
pFWaveform[i] += sms_dBToMag(fInstMag) * sinf(fInstPhase + PI_2);
}
+
/* save current values into buffer */
pLastFrame->pFSinFreq[iTrack] = fFreq;
pLastFrame->pFSinAmp[iTrack] = fMag;
@@ -110,14 +110,13 @@ static void SineSynth(sfloat fFreq, sfloat fMag, SMS_Data *pLastFrame,
int i;
/* if no mag in last frame copy freq from current */
- if (pLastFrame->pFSinAmp[iTrack] <= 0)
+ if(pLastFrame->pFSinAmp[iTrack] <= 0)
{
pLastFrame->pFSinFreq[iTrack] = fFreq;
- pLastFrame->pFSinPha[iTrack] =
- TWO_PI * sms_random();
+ pLastFrame->pFSinPha[iTrack] = TWO_PI * sms_random();
}
/* and the other way */
- else if (fMag <= 0)
+ else if(fMag <= 0)
fFreq = pLastFrame->pFSinFreq[iTrack];
/* calculate the instantaneous amplitude */
@@ -129,20 +128,18 @@ static void SineSynth(sfloat fFreq, sfloat fMag, SMS_Data *pLastFrame,
fInstPhase = pLastFrame->pFSinPha[iTrack];
/* generate all the samples */
- for (i = 0; i < sizeBuffer; i++)
+ for(i = 0; i < sizeBuffer; i++)
{
fInstMag += fMagIncr;
fInstFreq += fFreqIncr;
fInstPhase += fInstFreq;
-
pFBuffer[i] += sms_dBToMag(fInstMag) * sms_sine(fInstPhase);
}
/* save current values into last values */
pLastFrame->pFSinFreq[iTrack] = fFreq;
pLastFrame->pFSinAmp[iTrack] = fMag;
- pLastFrame->pFSinPha[iTrack] = fInstPhase -
- floor(fInstPhase / TWO_PI) * TWO_PI;
+ pLastFrame->pFSinPha[iTrack] = fInstPhase - floor(fInstPhase / TWO_PI) * TWO_PI;
}
/*! \brief generate all the sinusoids for a given frame
@@ -191,4 +188,3 @@ void sms_sineSynthFrame(SMS_Data *pSmsData, sfloat *pFBuffer,
}
}
}
-
diff --git a/sms/sms.c b/sms/sms.c
index 27b6724..8501d4d 100644
--- a/sms/sms.c
+++ b/sms/sms.c
@@ -319,7 +319,7 @@ int sms_initAnalysis(SMS_AnalParams *pAnalParams)
}
/* memory for residual */
- pAnalParams->residualParams.residualSize = pAnalParams->sizeHop * 2;
+ pAnalParams->residualParams.hopSize = pAnalParams->sizeHop;
sms_initResidual(&pAnalParams->residualParams);
/* memory for guide states */
@@ -467,15 +467,20 @@ int sms_initSynth(SMS_SynthParams *pSynthParams)
void sms_initResidualParams(SMS_ResidualParams *residualParams)
{
residualParams->samplingRate = 44100;
+ residualParams->hopSize = 256;
residualParams->residualSize = 0;
residualParams->residual = NULL;
- residualParams->residualWindow = NULL;
+ residualParams->fftWindow = NULL;
+ residualParams->ifftWindow = NULL;
+ residualParams->windowScale = 0.0;
residualParams->residualMag = 0.0;
residualParams->originalMag = 0.0;
residualParams->nCoeffs = 128;
residualParams->stocCoeffs = NULL;
residualParams->sizeStocMagSpectrum = 0;
residualParams->stocMagSpectrum = NULL;
+ residualParams->stocPhaseSpectrum = NULL;
+ residualParams->approx = NULL;
residualParams->approxEnvelope = NULL;
int i;
for(i = 0; i < SMS_MAX_SPEC; i++)
@@ -492,13 +497,14 @@ void sms_initResidualParams(SMS_ResidualParams *residualParams)
*/
int sms_initResidual(SMS_ResidualParams *residualParams)
{
- if(residualParams->residualSize <= 0)
+ if(residualParams->hopSize <= 0)
{
- sms_error("Residual size must be a positive integer");
+ sms_error("Residual hop size must be a positive integer");
return -1;
}
/* residual signal */
+ residualParams->residualSize = residualParams->hopSize * 2;
residualParams->residual = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat));
if(residualParams->residual == NULL)
{
@@ -506,15 +512,34 @@ int sms_initResidual(SMS_ResidualParams *residualParams)
return -1;
}
- /* residual window */
- residualParams->residualWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat));
- if(residualParams->residualWindow == NULL)
+ /* residual fft/ifft windows */
+ residualParams->fftWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat));
+ if(residualParams->fftWindow == NULL)
{
- sms_error("Could not allocate memory for residualWindow");
+ sms_error("Could not allocate memory for residual FFT window");
return -1;
}
- sms_getWindow(residualParams->residualSize, residualParams->residualWindow, SMS_WIN_HAMMING);
- sms_scaleWindow(residualParams->residualSize, residualParams->residualWindow);
+ sms_getWindow(residualParams->residualSize, residualParams->fftWindow, SMS_WIN_BH_70);
+ sms_scaleWindow(residualParams->residualSize, residualParams->fftWindow);
+
+ residualParams->ifftWindow = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat));
+ if(residualParams->ifftWindow == NULL)
+ {
+ sms_error("Could not allocate memory for residual IFFT window");
+ return -1;
+ }
+ sms_getWindow(residualParams->residualSize, residualParams->ifftWindow, SMS_WIN_HANNING);
+ /* compute IFFT window scaling:
+ * windows per hop = hop size / window size = 0.5
+ * overlap = 50% => 1 window total in each hop/frame
+ * => windowScale = window size / sum(window samples) = 1.85
+ * for a 1024 sized hamming window
+ */
+ int i;
+ sfloat sum = 0.0;
+ for(i = 0; i < residualParams->residualSize; i++)
+ sum += residualParams->ifftWindow[i];
+ residualParams->windowScale = (sfloat)residualParams->residualSize / sum;
/* stochastic analysis */
residualParams->stocCoeffs = (sfloat *)calloc(residualParams->nCoeffs, sizeof(sfloat));
@@ -531,7 +556,19 @@ int sms_initResidual(SMS_ResidualParams *residualParams)
sms_error("Could not allocate memory for stochastic magnitude spectrum");
return -1;
}
+ residualParams->stocPhaseSpectrum = (sfloat *)calloc(residualParams->sizeStocMagSpectrum, sizeof(sfloat));
+ if(residualParams->stocPhaseSpectrum == NULL)
+ {
+ sms_error("Could not allocate memory for stochastic magnitude spectrum");
+ return -1;
+ }
+ residualParams->approx = (sfloat *)calloc(residualParams->residualSize, sizeof(sfloat));
+ if(residualParams->approx == NULL)
+ {
+ sms_error("Could not allocate memory for spectral approximation");
+ return -1;
+ }
residualParams->approxEnvelope = (sfloat *)calloc(residualParams->nCoeffs, sizeof(sfloat));
if(residualParams->approxEnvelope == NULL)
{
@@ -553,19 +590,28 @@ void sms_freeResidual(SMS_ResidualParams *residualParams)
{
if(residualParams->residual)
free(residualParams->residual);
- if(residualParams->residualWindow)
- free(residualParams->residualWindow);
+ if(residualParams->fftWindow)
+ free(residualParams->fftWindow);
+ if(residualParams->ifftWindow)
+ free(residualParams->ifftWindow);
if(residualParams->stocCoeffs)
free(residualParams->stocCoeffs);
if(residualParams->stocMagSpectrum)
free(residualParams->stocMagSpectrum);
+ if(residualParams->stocPhaseSpectrum)
+ free(residualParams->stocPhaseSpectrum);
+ if(residualParams->approx)
+ free(residualParams->approx);
if(residualParams->approxEnvelope)
free(residualParams->approxEnvelope);
residualParams->residual = NULL;
- residualParams->residualWindow = NULL;
+ residualParams->fftWindow = NULL;
+ residualParams->ifftWindow = NULL;
residualParams->stocCoeffs = NULL;
residualParams->stocMagSpectrum = NULL;
+ residualParams->stocPhaseSpectrum = NULL;
+ residualParams->approx = NULL;
residualParams->approxEnvelope = NULL;
}
diff --git a/sms/sms.h b/sms/sms.h
index 7e80ee4..0613ab3 100644
--- a/sms/sms.h
+++ b/sms/sms.h
@@ -196,15 +196,20 @@ typedef struct
typedef struct
{
int samplingRate;
+ int hopSize;
int residualSize;
sfloat *residual;
- sfloat *residualWindow;
+ sfloat *fftWindow;
+ sfloat *ifftWindow;
+ sfloat windowScale;
sfloat residualMag;
sfloat originalMag;
int nCoeffs;
sfloat *stocCoeffs;
int sizeStocMagSpectrum;
sfloat *stocMagSpectrum;
+ sfloat *stocPhaseSpectrum;
+ sfloat *approx;
sfloat *approxEnvelope;
sfloat fftBuffer[SMS_MAX_SPEC * 2];
} SMS_ResidualParams;
@@ -589,13 +594,17 @@ void sms_arrayScalarTempered(int sizeArray, sfloat *pArray);
/* function declarations */
void sms_setPeaks(SMS_AnalParams *pAnalParams, int numamps, sfloat* amps,
int numfreqs, sfloat* freqs, int numphases, sfloat* phases);
-int sms_findPeaks(int sizeWaveform, sfloat *pWaveform, SMS_AnalParams *pAnalParams, SMS_SpectralPeaks *pSpectralPeaks);
+int sms_findPeaks(int sizeWaveform, sfloat *pWaveform,
+ SMS_AnalParams *pAnalParams, SMS_SpectralPeaks *pSpectralPeaks);
int sms_findPartials(SMS_Data *pSmsFrame, SMS_AnalParams *pAnalParams);
int sms_findResidual(int sizeSynthesis, sfloat* pSynthesis,
int sizeOriginal, sfloat* pOriginal,
SMS_ResidualParams *residualParams);
-void sms_approxResidual(SMS_ResidualParams *residualParams);
-int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData, SMS_AnalParams *pAnalParams);
+void sms_approxResidual(int sizeResidual, sfloat* residual,
+ int sizeApprox, sfloat* approx,
+ SMS_ResidualParams *residualParams);
+int sms_analyze(int sizeWaveform, sfloat *pWaveform, SMS_Data *pSmsData,
+ SMS_AnalParams *pAnalParams);
void sms_analyzeFrame(int iCurrentFrame, SMS_AnalParams *pAnalParams, sfloat fRefFundamental);
int sms_init();
@@ -615,6 +624,8 @@ void sms_getWindow(int sizeWindow, sfloat *pWindow, int iWindowType);
void sms_scaleWindow(int sizeWindow, sfloat *pWindow);
int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag,
sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
+int sms_spectrumW(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag,
+ sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
int sms_invSpectrum(int sizeWaveform, sfloat *pWaveform, sfloat *pWindow ,
int sizeMag, sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer);
/* \todo remove this once invSpectrum is completely implemented */
diff --git a/sms/sms.i b/sms/sms.i
index afde07a..0b3ee5a 100644
--- a/sms/sms.i
+++ b/sms/sms.i
@@ -31,7 +31,6 @@
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeAmp, double* pAmp)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeMag, double* pMag)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizePhase, double* pPhase)};
-%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeRes, double* pRes)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeCepstrum, double* pCepstrum)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeEnv, double* pEnv)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeTrack, double* pTrack)};
@@ -39,6 +38,11 @@
%apply(int DIM1, double* IN_ARRAY1) {(int sizeInArray, double* pInArray)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeOutArray, double* pOutArray)};
%apply(int DIM1, double* INPLACE_ARRAY1) {(int sizeHop, double* pSynthesis)};
+%apply(int DIM1, double* INPLACE_ARRAY1)
+{
+ (int sizeResidual, double* residual),
+ (int sizeApprox, double* approx)
+}
%apply(int DIM1, double* IN_ARRAY1)
{
(int numamps, double* amps),
@@ -504,26 +508,15 @@
{
void getResidual(int sizeArray, sfloat *pArray)
{
- if(sizeArray < $self->residualSize)
+ if(sizeArray < $self->hopSize)
{
sms_error("numpy array not big enough");
return;
}
int i;
- for(i = 0; i < $self->residualSize; i++)
+ for(i = 0; i < $self->hopSize; i++)
pArray[i] = $self->residual[i];
}
- void getApprox(int sizeArray, sfloat *pArray)
- {
- if(sizeArray < $self->nCoeffs)
- {
- sms_error("numpy array not big enough");
- return;
- }
- int i;
- for(i = 0; i < $self->nCoeffs; i++)
- pArray[i] = $self->approxEnvelope[i];
- }
}
%extend SMS_ModifyParams
diff --git a/sms/spectrum.c b/sms/spectrum.c
index 666b42d..1b8e053 100644
--- a/sms/spectrum.c
+++ b/sms/spectrum.c
@@ -37,7 +37,6 @@ int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag
sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer)
{
int i, it2;
- int err = 0;
sfloat fReal, fImag;
int sizeFft = sizeMag << 1;
@@ -55,6 +54,38 @@ int sms_spectrum(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag
fImag = pFftBuffer[it2 + 1]; /*even numbers 2->N+2 */
pMag[i] = sqrt(fReal * fReal + fImag * fImag);
pPhase[i] = atan2(-fImag, fReal); /* \todo why is fImag negated? */
+ /*pPhase[i] = atan2(fImag, fReal);*/
+ }
+
+ return sizeFft;
+}
+
+/* sms_spectrum, but without zero-phase windowing, and with phase calculated
+ * according by arctan(imag/real) instead of arctan2(-imag/real)
+ */
+int sms_spectrumW(int sizeWindow, sfloat *pWaveform, sfloat *pWindow, int sizeMag,
+ sfloat *pMag, sfloat *pPhase, sfloat *pFftBuffer)
+{
+ int i, it2;
+ sfloat fReal, fImag;
+
+ int sizeFft = sizeMag << 1;
+ memset(pFftBuffer, 0, sizeFft * sizeof(sfloat));
+
+ /* apply window to waveform */
+ for(i = 0; i < sizeWindow; i++)
+ pFftBuffer[i] = pWaveform[i] * pWindow[i];
+
+ sms_fft(sizeFft, pFftBuffer);
+
+ /* convert from rectangular to polar coordinates */
+ for(i = 0; i < sizeMag; i++)
+ {
+ it2 = i << 1; //even numbers 0-N
+ fReal = pFftBuffer[it2]; /*odd numbers 1->N+1 */
+ fImag = pFftBuffer[it2 + 1]; /*even numbers 2->N+2 */
+ pMag[i] = sqrt(fReal * fReal + fImag * fImag);
+ pPhase[i] = atan2(fImag, fReal);
}
return sizeFft;
@@ -81,8 +112,8 @@ int sms_spectrumMag(int sizeWindow, sfloat *pWaveform, sfloat *pWindow,
sfloat fReal, fImag;
/* apply window to waveform, zero the rest of the array */
- for (i = 0; i < sizeWindow; i++)
- pFftBuffer[i] = pWindow[i] * pWaveform[i];
+ for(i = 0; i < sizeWindow; i++)
+ pFftBuffer[i] = pWaveform[i] * pWindow[i];
for(i = sizeWindow; i < sizeFft; i++)
pFftBuffer[i] = 0.;
@@ -90,7 +121,7 @@ int sms_spectrumMag(int sizeWindow, sfloat *pWaveform, sfloat *pWindow,
sms_fft(sizeFft, pFftBuffer);
/* convert from rectangular to polar coordinates */
- for (i=0; i<sizeMag; i++)
+ for(i = 0; i < sizeMag; i++)
{
it2 = i << 1;
fReal = pFftBuffer[it2];
@@ -123,7 +154,7 @@ int sms_invSpectrum(int sizeWaveform, sfloat *pWaveform, sfloat *pWindow,
sms_PolarToRect(sizeMag, pFftBuffer, pMag, pPhase);
sms_ifft(sizeFft, pFftBuffer);
- /* assume the output array has been taken care off */
+ /* assume that the output array does not need to be cleared */
/* before, this was multiplied by .5, why? */
for(i = 0; i < sizeWaveform; i++)
//pWaveform[i] += pFftBuffer[i] * pWindow[i];
@@ -145,25 +176,21 @@ int sms_invQuickSpectrumW(sfloat *pFMagSpectrum, sfloat *pFPhaseSpectrum,
int sizeFft, sfloat *pFWaveform, int sizeWave,
sfloat *pFWindow, sfloat* pFftBuffer)
{
- int sizeMag = sizeFft >> 1, i, it2;
- sfloat fPower;
+ int i, it2;
+ int sizeMag = sizeFft >> 1;
- /* convert from polar coordinates to rectangular */
- for(i = 0; i<sizeMag; i++)
+ /* convert from polar coordinates to rectangular */
+ for(i = 0; i < sizeMag; i++)
{
it2 = i << 1;
- fPower = pFMagSpectrum[i];
- pFftBuffer[it2] = fPower * cos (pFPhaseSpectrum[i]);
- pFftBuffer[it2+1] = fPower * sin (pFPhaseSpectrum[i]);
+ pFftBuffer[it2] = pFMagSpectrum[i] * cos(pFPhaseSpectrum[i]);
+ pFftBuffer[it2+1] = pFMagSpectrum[i] * sin(pFPhaseSpectrum[i]);
}
/* compute IFFT */
sms_ifft(sizeFft, pFftBuffer);
- /* assume the output array has been taken care off */
- /* \todo is a seperate pFftBuffer necessary here?
- it seems like multiplying the window into the waveform
- would be fine, without pFftBuffer */
+ /* assume that the output array does not need to be cleared */
for(i = 0; i < sizeWave; i++)
pFWaveform[i] += (pFftBuffer[i] * pFWindow[i] * .5);
diff --git a/sms/synthesis.c b/sms/synthesis.c
index 9523ee6..bc739d0 100644
--- a/sms/synthesis.c
+++ b/sms/synthesis.c
@@ -120,14 +120,12 @@ static int StocSynthApprox(SMS_Data *pSmsData, SMS_SynthParams *pSynthParams)
int sizeSpec1 = pSmsData->nCoeff;
int sizeSpec2 = pSynthParams->sizeHop;
int sizeFft = pSynthParams->sizeHop << 1; /* 50% overlap, so sizeFft is 2x sizeHop */
- sfloat fStocGain;
/* if no gain or no coefficients return */
- if (*(pSmsData->pFStocGain) <= 0)
+ if(*(pSmsData->pFStocGain) <= 0)
return 0;
- sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate /
- pSynthParams->iOriginalSRate;
+ sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate / pSynthParams->iOriginalSRate;
/* sizeSpec1Used cannot be more than what is available \todo check by graph */
if(sizeSpec1Used > sizeSpec1) sizeSpec1Used = sizeSpec1;
@@ -150,74 +148,79 @@ static int StocSynthApprox(SMS_Data *pSmsData, SMS_SynthParams *pSynthParams)
*
* \param residualParams Parameters and memory for residual synthesis
*/
-void sms_approxResidual(SMS_ResidualParams *residualParams)
+void sms_approxResidual(int sizeResidual, sfloat* residual,
+ int sizeApprox, sfloat* approx,
+ SMS_ResidualParams *residualParams)
{
- /* filter residual with a high pass filter */
- sms_filterHighPass(residualParams->residualSize,
- residualParams->residual,
- residualParams->samplingRate);
-
- sms_spectrumMag(residualParams->residualSize,
- residualParams->residual,
- residualParams->residualWindow,
- residualParams->sizeStocMagSpectrum,
- residualParams->stocMagSpectrum,
- residualParams->fftBuffer);
-
- sms_spectralApprox(residualParams->stocMagSpectrum,
- residualParams->sizeStocMagSpectrum,
- residualParams->sizeStocMagSpectrum,
- residualParams->stocCoeffs,
- residualParams->nCoeffs,
- residualParams->nCoeffs,
- residualParams->approxEnvelope);
-
- /* get energy of spectrum */
int i;
- sfloat fMag = 0.0;
- for(i = 0; i < residualParams->sizeStocMagSpectrum; i++)
- fMag += (residualParams->stocMagSpectrum[i] * pAnalParams->stocMagSpectrum[i]);
- /* if no gain or no coefficients return */
- sfloat stocGain = fMag / residualParams->sizeStocMagSpectrum;
- if(stocGain <= 0)
- return;
+ /* shift buffers */
+ memcpy(residualParams->residual,
+ residualParams->residual + residualParams->hopSize,
+ sizeof(sfloat) * residualParams->hopSize);
+ memcpy(residualParams->residual + residualParams->hopSize, residual,
+ sizeof(sfloat) * residualParams->hopSize);
- int i, sizeSpec1Used;
- int sizeSpec1 = residualParams->nCoeffs;
- /*int sizeSpec2 = pSynthParams->sizeHop;*/
- int sizeSpec2 = residualParams->residualSize;
- int sizeFft = sizeSpec2 << 1; /* 50% overlap, so sizeFft is 2x sizeHop */
+ memcpy(residualParams->approx,
+ residualParams->approx + residualParams->hopSize,
+ sizeof(sfloat) * residualParams->hopSize);
+ memset(residualParams->approx + residualParams->hopSize, 0,
+ sizeof(sfloat) * residualParams->hopSize);
- /*sizeSpec1Used = sizeSpec1 * pSynthParams->iSamplingRate / pSynthParams->iOriginalSRate;*/
+ sms_spectrumMag(residualParams->residualSize,
+ residualParams->residual,
+ residualParams->fftWindow,
+ residualParams->sizeStocMagSpectrum,
+ residualParams->stocMagSpectrum,
+ residualParams->fftBuffer);
+
+ if(residualParams->sizeStocMagSpectrum != residualParams->nCoeffs)
+ {
+ sms_spectralApprox(residualParams->stocMagSpectrum,
+ residualParams->sizeStocMagSpectrum,
+ residualParams->sizeStocMagSpectrum,
+ residualParams->stocCoeffs,
+ residualParams->nCoeffs,
+ residualParams->nCoeffs,
+ residualParams->approxEnvelope);
- /*[> sizeSpec1Used cannot be more than what is available \todo check by graph <]*/
- /*if(sizeSpec1Used > sizeSpec1) sizeSpec1Used = sizeSpec1;*/
+ sms_spectralApprox(residualParams->stocCoeffs,
+ residualParams->nCoeffs,
+ residualParams->nCoeffs,
+ residualParams->stocMagSpectrum,
+ residualParams->sizeStocMagSpectrum,
+ residualParams->sizeStocMagSpectrum,
+ residualParams->approxEnvelope);
+ }
- /*sms_spectralApprox(pSmsData->pFStocCoeff, sizeSpec1, sizeSpec1Used,*/
- /* pSynthParams->pMagBuff, sizeSpec2, sizeSpec1Used,*/
- /* pSynthParams->approxEnvelope);*/
+ /* generate random phases */
+ for(i = 0; i < residualParams->sizeStocMagSpectrum; i++)
+ residualParams->stocPhaseSpectrum[i] = TWO_PI * sms_random();
- /*[> generate random phases <]*/
- /*for(i = 0; i < sizeSpec2; i++)*/
- /* pSynthParams->pPhaseBuff[i] = TWO_PI * sms_random();*/
+ /* IFFT with 50% overlap */
+ sms_invQuickSpectrumW(residualParams->stocMagSpectrum,
+ residualParams->stocPhaseSpectrum,
+ residualParams->sizeStocMagSpectrum*2,
+ residualParams->approx,
+ residualParams->residualSize,
+ residualParams->ifftWindow,
+ residualParams->fftBuffer);
- /*sms_invQuickSpectrumW(pSynthParams->pMagBuff, pSynthParams->pPhaseBuff,*/
- /* sizeFft, pSynthParams->pSynthBuff, sizeFft,*/
- /* pSynthParams->pFStocWindow, pSynthParams->pSpectra);*/
+ /* output */
+ for(i = 0; i < sizeApprox; i++)
+ approx[i] = residualParams->approx[i] * residualParams->windowScale;
}
/*! \brief synthesizes one frame of SMS data
*
- * \param pSmsData input SMS data
- * \param pFSynthesis output sound buffer
- * \param pSynthParams synthesis parameters
+ * \param pSmsData input SMS data
+ * \param pFSynthesis output sound buffer
+ * \param pSynthParams synthesis parameters
*/
void sms_synthesize(SMS_Data *pSmsData, sfloat *pFSynthesis, SMS_SynthParams *pSynthParams)
{
- int i, k;
+ int i;
int sizeHop = pSynthParams->sizeHop;
- int sizeFft = sizeHop << 1;
memcpy(pSynthParams->pSynthBuff, (sfloat *)(pSynthParams->pSynthBuff+sizeHop),
sizeof(sfloat) * sizeHop);
diff --git a/tests/sms.py b/tests/sms.py
index b09e692..ca51f3f 100644
--- a/tests/sms.py
+++ b/tests/sms.py
@@ -24,10 +24,8 @@ from nose.tools import assert_almost_equals
class TestSimplSMS(object):
FLOAT_PRECISION = 2 # number of decimal places to check for accuracy
input_file = 'audio/flute.wav'
- frame_size = 2048
hop_size = 512
num_frames = 50
- #num_samples = frame_size + ((num_frames - 1) * hop_size)
num_samples = num_frames * hop_size
max_peaks = 10
max_partials = 10
@@ -55,6 +53,7 @@ class TestSimplSMS(object):
analysis_params.minGoodFrames = 1
analysis_params.iCleanTracks = 0
analysis_params.iStochasticType = pysms.SMS_STOC_NONE
+ analysis_params.preEmphasis = 0
return analysis_params
def simplsms_analysis_params(self, sampling_rate):
@@ -74,6 +73,7 @@ class TestSimplSMS(object):
analysis_params.minGoodFrames = 1
analysis_params.iCleanTracks = 0
analysis_params.iStochasticType = simplsms.SMS_STOC_NONE
+ analysis_params.preEmphasis = 0
return analysis_params
def pysms_synthesis_params(self, sampling_rate):
@@ -84,6 +84,7 @@ class TestSimplSMS(object):
synth_params.iStochasticType = pysms.SMS_STOC_NONE
synth_params.sizeHop = self.hop_size
synth_params.nTracks = self.max_peaks
+ synth_params.deEmphasis = 0
return synth_params
def test_size_next_read(self):
@@ -112,13 +113,13 @@ class TestSimplSMS(object):
while current_frame < self.num_frames:
sms_next_read_sizes.append(analysis_params.sizeNextRead)
sample_offset += pysms_size_new_data
- if((sample_offset + analysis_params.sizeNextRead) < self.num_samples):
- pysms_size_new_data = analysis_params.sizeNextRead
- else:
- pysms_size_new_data = self.num_samples - sample_offset
+ pysms_size_new_data = analysis_params.sizeNextRead
# convert frame to floats for libsms
frame = audio[sample_offset:sample_offset + pysms_size_new_data]
frame = np.array(frame, dtype=np.float32)
+ if len(frame) < pysms_size_new_data:
+ frame = np.hstack((frame, np.zeros(pysms_size_new_data - len(frame),
+ dtype=np.float32)))
analysis_data = pysms.SMS_Data()
pysms.sms_allocFrameH(sms_header, analysis_data)
status = pysms.sms_analyze(frame, analysis_data, analysis_params)
@@ -141,6 +142,7 @@ class TestSimplSMS(object):
while current_frame < self.num_frames:
pd.frame_size = pd.get_next_frame_size()
+ #print current_frame, sms_next_read_sizes[current_frame], pd.frame_size
assert sms_next_read_sizes[current_frame] == pd.frame_size
frame = simpl.Frame()
frame.size = pd.frame_size
@@ -181,6 +183,9 @@ class TestSimplSMS(object):
frame.size = size_new_data
frame.audio = np.array(audio[sample_offset:sample_offset + size_new_data],
dtype=np.float32)
+ if len(frame.audio) < size_new_data:
+ frame.audio = np.hstack((frame.audio, np.zeros(size_new_data - len(frame.audio),
+ dtype=np.float32)))
analysis_data = pysms.SMS_Data()
pysms.sms_allocFrameH(sms_header, analysis_data)
status = pysms.sms_analyze(frame.audio, analysis_data, analysis_params)
@@ -242,6 +247,8 @@ class TestSimplSMS(object):
frame = simpl.Frame()
frame.size = size_new_data
frame.audio = audio[sample_offset:sample_offset + size_new_data]
+ if len(frame.audio) < size_new_data:
+ frame.audio = np.hstack((frame.audio, simpl.zeros(size_new_data - len(frame.audio))))
analysis_data = simplsms.SMS_Data()
simplsms.sms_allocFrameH(simpl_sms_header, analysis_data)
status = simplsms.sms_analyze(frame.audio, analysis_data, simpl_analysis_params)
@@ -483,6 +490,8 @@ class TestSimplSMS(object):
sample_offset += size_new_data
size_new_data = analysis_params.sizeNextRead
frame = audio[sample_offset:sample_offset + size_new_data]
+ if len(frame) < size_new_data:
+ frame = np.hstack((frame, simpl.zeros(size_new_data - len(frame))))
analysis_data = simplsms.SMS_Data()
simplsms.sms_allocFrameH(sms_header, analysis_data)
status = simplsms.sms_analyze(frame, analysis_data, analysis_params)
@@ -526,6 +535,8 @@ class TestSimplSMS(object):
frame = simpl.Frame()
frame.size = pd.frame_size
frame.audio = audio[sample_offset:sample_offset + pd.frame_size]
+ if len(frame.audio) < pd.frame_size:
+ frame.audio = np.hstack((frame.audio, simpl.zeros(pd.frame_size - len(frame.audio))))
simpl_peaks.append(pd.find_peaks_in_frame(frame))
sample_offset += pd.frame_size
current_frame += 1
@@ -764,30 +775,27 @@ class TestSimplSMS(object):
p.frequency = sms_freqs[i]
p.phase = sms_phases[i]
peaks.append(p)
- else:
- for i in range(num_partials):
- p = simpl.Peak()
- p.amplitude = 0.0
- p.frequency = 0.0
- p.phase = 0.0
- peaks.append(p)
+ frame.partials = peaks
+ sms_frames.append(frame)
+ current_frame += 1
if status == -1:
do_analysis = False
- frame.partials = peaks
- sms_frames.append(frame)
pysms.sms_freeFrame(analysis_data)
- current_frame += 1
+ # first frame is blank
+ sms_frames = sms_frames[1:]
+
+ # free sms memory
pysms.sms_freeAnalysis(analysis_params)
pysms.sms_closeSF()
pysms.sms_free()
pd = simpl.SMSPeakDetection()
pd.max_peaks = self.max_peaks
- pd.hop_size = self.hop_size
- peaks = pd.find_peaks(audio)[0:self.num_frames]
+ pd.hop_size = self.hop_size
+ peaks = pd.find_peaks(audio)
pt = simpl.SMSPartialTracking()
pt.max_partials = self.max_partials
simpl_frames = pt.find_partials(peaks)
@@ -1023,10 +1031,19 @@ class TestSimplSMS(object):
analysis_data = pysms.SMS_Data()
pysms.sms_allocFrameH(sms_header, analysis_data)
status = pysms.sms_analyze(frame, analysis_data, analysis_params)
- analysis_frames.append(analysis_data)
- if status == -1:
+ if status == 1:
+ analysis_frames.append(analysis_data)
+ current_frame += 1
+ elif status == 0:
+ pysms.sms_freeFrame(analysis_data)
+ elif status == -1:
do_analysis = False
- current_frame += 1
+ pysms.sms_freeFrame(analysis_data)
+
+ # remove the first frame, it's blank
+ blank_frame = analysis_frames[0]
+ analysis_frames = analysis_frames[1:]
+ pysms.sms_freeFrame(blank_frame)
synth_params = self.pysms_synthesis_params(sampling_rate)
pysms.sms_initSynth(sms_header, synth_params)
@@ -1050,15 +1067,14 @@ class TestSimplSMS(object):
pd = simpl.SMSPeakDetection()
pd.max_peaks = self.max_peaks
pd.hop_size = self.hop_size
- peaks = pd.find_peaks(audio)[0:self.num_frames]
+ peaks = pd.find_peaks(audio)
pt = simpl.SMSPartialTracking()
pt.max_partials = self.max_partials
partials = pt.find_partials(peaks)
synth = simpl.SMSSynthesis()
synth.hop_size = self.hop_size
synth.max_partials = self.max_partials
- synth.stochastic_type = simplsms.SMS_STOC_NONE
- synth.synthesis_type = simplsms.SMS_STYPE_DET
+ synth.det_synthesis_type = simplsms.SMS_DET_IFFT
simpl_audio = synth.synth(partials)
assert len(sms_audio) == len(simpl_audio)
@@ -1066,7 +1082,7 @@ class TestSimplSMS(object):
assert_almost_equals(sms_audio[i], simpl_audio[i], self.FLOAT_PRECISION)
def test_harmonic_synthesis_sin(self):
- """test_harmonic_synthesis
+ """test_harmonic_synthesis_sin
Compare pysms synthesised harmonic component with SMS synthesised
harmonic component."""
audio, sampling_rate = self.get_audio()
@@ -1076,10 +1092,10 @@ class TestSimplSMS(object):
if(pysms.sms_openSF(self.input_file, snd_header)):
raise NameError("error opening sound file: " + pysms.sms_errorString())
analysis_params = self.pysms_analysis_params(sampling_rate)
- analysis_params.nFrames = self.num_frames
if pysms.sms_initAnalysis(analysis_params, snd_header) != 0:
raise Exception("Error allocating memory for analysis_params")
analysis_params.iSizeSound = self.num_samples
+ analysis_params.nFrames = self.num_frames
sms_header = pysms.SMS_Header()
pysms.sms_fillHeader(sms_header, analysis_params, "pysms")
@@ -1095,16 +1111,28 @@ class TestSimplSMS(object):
frame = audio[sample_offset:sample_offset + size_new_data]
# convert frame to floats for libsms
frame = np.array(frame, dtype=np.float32)
+ if len(frame) < size_new_data:
+ frame = np.hstack((frame, np.zeros(size_new_data - len(frame),
+ dtype=np.float32)))
analysis_data = pysms.SMS_Data()
pysms.sms_allocFrameH(sms_header, analysis_data)
status = pysms.sms_analyze(frame, analysis_data, analysis_params)
- analysis_frames.append(analysis_data)
- if status == -1:
+ if status == 1:
+ analysis_frames.append(analysis_data)
+ current_frame += 1
+ elif status == 0:
+ pysms.sms_freeFrame(analysis_data)
+ elif status == -1:
do_analysis = False
- current_frame += 1
+ pysms.sms_freeFrame(analysis_data)
+
+ # remove the first frame, it's blank
+ blank_frame = analysis_frames[0]
+ analysis_frames = analysis_frames[1:]
+ pysms.sms_freeFrame(blank_frame)
synth_params = self.pysms_synthesis_params(sampling_rate)
- synth_params.iDetSynthesisType = pysms.SMS_DET_SIN
+ synth_params.iDetSynthType = pysms.SMS_DET_SIN
pysms.sms_initSynth(sms_header, synth_params)
synth_samples = np.zeros(synth_params.sizeHop, dtype=np.float32)
@@ -1126,14 +1154,13 @@ class TestSimplSMS(object):
pd = simpl.SMSPeakDetection()
pd.max_peaks = self.max_peaks
pd.hop_size = self.hop_size
- peaks = pd.find_peaks(audio)[0:self.num_frames]
+ peaks = pd.find_peaks(audio)
pt = simpl.SMSPartialTracking()
pt.max_partials = self.max_partials
partials = pt.find_partials(peaks)
synth = simpl.SMSSynthesis()
synth.hop_size = self.hop_size
synth.max_partials = self.max_partials
- synth.stochastic_type = simplsms.SMS_STOC_NONE
synth.det_synthesis_type = simplsms.SMS_DET_SIN
simpl_audio = synth.synth(partials)
@@ -1144,6 +1171,13 @@ class TestSimplSMS(object):
def test_residual_synthesis(self):
"""test_residual_synthesis
Compare pysms residual signal with SMS residual"""
+
+ # -------------------------------------------
+ # This test is not finished yet. Skip for now
+ from nose.plugins.skip import SkipTest
+ raise SkipTest
+ # -------------------------------------------
+
audio, sampling_rate = self.get_audio()
pysms.sms_init()
snd_header = pysms.SMS_SndHeader()
@@ -1154,7 +1188,6 @@ class TestSimplSMS(object):
analysis_params.nFrames = self.num_frames
analysis_params.nStochasticCoeff = 128
analysis_params.iStochasticType = pysms.SMS_STOC_APPROX
- analysis_params.preEmphasis = 0
if pysms.sms_initAnalysis(analysis_params, snd_header) != 0:
raise Exception("Error allocating memory for analysis_params")
analysis_params.iSizeSound = self.num_samples
@@ -1176,13 +1209,6 @@ class TestSimplSMS(object):
analysis_data = pysms.SMS_Data()
pysms.sms_allocFrameH(sms_header, analysis_data)
status = pysms.sms_analyze(frame, analysis_data, analysis_params)
- #if status == 1:
- # analysis_frames.append(analysis_data)
- #elif status == -1:
- # do_analysis = False
- # pysms.sms_freeFrame(analysis_data)
- #else:
- # pysms.sms_freeFrame(analysis_data)
analysis_frames.append(analysis_data)
if status == -1:
do_analysis = False
@@ -1192,7 +1218,6 @@ class TestSimplSMS(object):
synth_params = self.pysms_synthesis_params(sampling_rate)
synth_params.iStochasticType = pysms.SMS_STOC_APPROX
synth_params.iSynthesisType = pysms.SMS_STYPE_STOC
- synth_params.deEmphasis = 0
pysms.sms_initSynth(sms_header, synth_params)
synth_samples = np.zeros(synth_params.sizeHop, dtype=np.float32)
sms_residual = np.array([], dtype=np.float32)
@@ -1235,7 +1260,7 @@ if __name__ == "__main__":
# useful for debugging, particularly with GDB
import nose
argv = [__file__,
+ "--nocapture",
#__file__ + ":TestSimplSMS.test_residual_synthesis"]
- __file__ + ":TestSimplSMS.test_sms_analyze"]
+ __file__ + ":TestSimplSMS.test_harmonic_synthesis_sin"]
nose.run(argv=argv)
-