aboutsummaryrefslogtreecommitdiff
path: root/site/udo/mfcc_match.udo
blob: a87df40a96cee228354888e4f799a2f8bc1ab23c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#ifndef UDO_MFCCMATCH
#define UDO_MFCCMATCH ##

#include "/wavetables.udo"

; FFT size for MFCC analysis (lower = more CPU)
gimfm_default_fftsize = 1024

; Number of MFCC bands to use (^2, ideally 8, 16, 32)
gimfm_default_mfccbands = 16

; default upper and lower frequencies of range to analuse
gimfm_default_freqrange[] fillarray 140, 19000


opcode _mfm_checkksmps, 0, 0
	if (ksmps & (ksmps -1) != 0) then
		prints "\n\nERROR: MFCC matching requires ksmps to be a power of two\n\n"
		exitnow
	endif
endop


/*
 * Calculate the Euclidean distance between a table point and an array
 *	in:
 *    	icorpusdata		Table containing MFCC corpus data
 *		ibands			Number of bands used for MFCC analysis in corpus table
 *		kcorpusindex	Start index of corpus data to compare
 *		kmatch[]		Array of MFCC values to compare against
 *	out:
 * 		ktotal			Euclidean distance
 */
opcode _mfm_euclideandistance, k, ikk[]i
	icorpusdata, kcorpusindex, kmatch[], imfccbands xin
	ktotal = 0
	kdx = 0
	while (kdx < imfccbands) do
		kcorpusval tab kcorpusindex+kdx, icorpusdata
		ktotal += pow((kcorpusval - kmatch[kdx]), 2)
		kdx += 1
	od
	xout sqrt(ktotal)	
endop


/*
 * Get MFCC data from an audio signal
 *	in:
 *		asig		The audio signal for analysis
 *		ifreqmin=100	Optional minimum frequency for analysis
 *		ifreqmax=19000	Optional maximum frequency for analysis
 *	out:
 *		kmfcc[]		Array of MFCC data with length ibands
 *		ktrig		Fired when new data has been output
 */
opcode _mfm_getmfccs, k[]k, aiiii
	asig, ifreqmin, ifreqmax, ifftsize, imfccbands xin
	_mfm_checkksmps()
	kcnt init 0
	ibins init ifftsize/2
	kIn[] init ifftsize
	kIn shiftin asig
	kcnt += ksmps
	ktrig = 0
	if (kcnt == ifftsize) then
		kFFT[] = rfft(kIn)
		kPows[] = pows(kFFT)
		kMFB[] = log(mfb(kPows, ifreqmin, ifreqmax, imfccbands), 0)
		kmfcc[] = dct(kMFB)
		kcnt = 0
		ktrig = 1
	endif
	xout kmfcc, ktrig
endop

/*
 * Get nearest matching table index of an audio signal based on MFCC analysis and distance comparison
 *	in:
 *		asig		The driving audio signal
 *		ifftsize	FFT size for MFCC analysis
 *		ibands		Number of MFCC bands to use
 *		icorpusdata	Table containing MFCC corpus data
 *	out:
 *		kindex		Start index of corpus audio table that best matches
 *		ktrig		Fired when new match has been output
 */
opcode _mfm_nearest, kk, aijjjj
	asig, icorpusdata, ifreqmin, ifreqmax, ifftsize, imfccbands xin
	imaxitems = ftlen(icorpusdata) 
	kmfcc[], ktrig _mfm_getmfccs asig, ifreqmin, ifreqmax, ifftsize, imfccbands
	kouttrig = 0
	if (ktrig == 1) then
		kcorpusindex = 0
		kbest = 9999999
		kbestindex = -1
		while (kcorpusindex < imaxitems - imfccbands) do
			kdistance _mfm_euclideandistance icorpusdata, kcorpusindex, kmfcc, imfccbands
			if (kdistance < kbest) then
				kbest = kdistance
				kbestindex = kcorpusindex
			endif
			kcorpusindex += imfccbands
		od

	endif
	xout (kbestindex/imfccbands)*ifftsize, ktrig
endop


opcode mfm_analysecorpus, ki, kijjjjjj
	ktimek, ifn, ifreqmin, ifreqmax, ifftsize, imfccbands, ifnmaxindex, icorpustmpfn xin
	
	ifreqmin = ((ifreqmin == -1) ? gimfm_default_freqrange[0]: ifreqmin)
	ifreqmax = ((ifreqmax == -1) ? gimfm_default_freqrange[1]: ifreqmax)
	ifftsize = ((ifftsize == -1) ? gimfm_default_fftsize : ifftsize)
	imfccbands = ((imfccbands == -1) ? gimfm_default_mfccbands : imfccbands)
	ifnmaxindex = ((ifnmaxindex == -1) ? ftlen(ifn) : ifnmaxindex)

	ilen = ifnmaxindex / ftsr(ifn)
	imaxitems = imfccbands * (ifnmaxindex / ifftsize)
	if (icorpustmpfn == 1) then
		icorpusdata ftgentmp 0, 0, -imaxitems, 2, 0
	else
		icorpusdata ftgen 0, 0, -imaxitems, 2, 0
	endif
	;ktimek timeinstk

	kdone init 0
	if (ktimek == 1) then
		kcycles = ilen*kr
		kcount init 0
loop:
		;asig loscil 1, 1, ifn, 1
		apos lphasor 1
		asig table3 apos, ifn
		kdx init 0
		kmfcc[], ktrig _mfm_getmfccs asig, ifreqmin, ifreqmax, ifftsize, imfccbands
		if (ktrig == 1) then
			kfb = 0
			while (kfb < imfccbands) do
				tabw kmfcc[kfb], kdx, icorpusdata
				kfb += 1
				kdx += 1
			od
		endif
		loop_lt kcount, 1, kcycles, loop
	else
		kdone = 1
	endif
	xout kdone, icorpusdata
endop


opcode mfm_matchplay, a, aiikjjjjj
	ain, ifn, ifndata, kstretch, ifreqmin, ifreqmax, ifftsize, imfccbands, ifnmaxindex xin
	ifreqmin = ((ifreqmin == -1) ? gimfm_default_freqrange[0]: ifreqmin)
	ifreqmax = ((ifreqmax == -1) ? gimfm_default_freqrange[1]: ifreqmax)
	ifftsize = ((ifftsize == -1) ? gimfm_default_fftsize : ifftsize)
	imfccbands = ((imfccbands == -1) ? gimfm_default_mfccbands : imfccbands)
	ilen = ((ifnmaxindex == -1) ? ftlen(ifn) : ifnmaxindex)
	icsr = ftsr(ifn)

	kdx, ktrig _mfm_nearest ain, ifndata, ifreqmin, ifreqmax, ifftsize, imfccbands

	icduration = ilen / icsr
	icps = 1/(ilen/icsr) 
	aphs, a_ syncphasor icps*(1-kstretch), a(ktrig)
	apos = (((aphs * ilen) + kdx) / ilen) * icduration

	amatched sndwarp 0.7, apos, 1, ifn, 0, ifftsize/2, 64, 4, gifnHalfSine, 1
	;amatched balance amatched, delay(ain, (1/sr)*ifftsize)
	xout amatched

endop


#end