aboutsummaryrefslogtreecommitdiff
path: root/mfcc_match_fltk_demo.csd
blob: 9fc1368456c11fb1678e0229563e4af8ab327400 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
<CsoundSynthesizer>
<CsOptions>
-odac
--m-amps=0
</CsOptions>
<CsInstruments>
sr = 44100
ksmps = 64
nchnls = 2
0dbfs = 1

/*
 *	MFCC matching / concatenative resynthesis example
 *	By Richard Knight 2021
 *
 *	Interactive FLTK interface example
 *	
 *	See README.md for overview and usage details.
 *	
 */

; put your own sounds here and they will be analysed/added to available sounds at startup.
;gSsounds[] fillarray "/path/to/sound1.wav", "/path/to/sound2.wav"

; default is to use any sounds in the sounds subdirectory
gSsounds[] directory "sounds", ".wav"

; FFT size for MFCC analysis (lower = more CPU)
gifftsize = 1024

; Number of MFCC bands to use (^2, ideally 8, 16, 32)
gimfccbands = 16




; instrument numbers for updateui and player, as used in FL opcode calls
iupdateui nstrnum "updateui"
iplayer nstrnum "player"


/*
 * Return element after last slash
 */
opcode strfilename, S, S
	Sinput xin
	xout strsub(Sinput, strrindex(Sinput, "/") + 1)
endop


; the UI
FLpanel "MFCC Matching", 800, 500
	gkcorpusindex, gicorpusindex FLcount "Corpus", 0, lenarray(gSsounds)-1, 1, 1, 1, 200, 50, 0, 0, 0, iupdateui, 0, 1, 1	; updateui item 1
	gicorpusbox FLbox strfilename(gSsounds[0]), 1, 1, 18, 300, 50, 0, 100
	FLsetTextSize 24, gicorpusindex
	FLsetFont 2, gicorpusindex

	gkdriverindex, gidriverindex FLcount "Driver", 0, lenarray(gSsounds)-1, 1, 1, 1, 200, 50, 600, 0, 0, iupdateui, 0, 1, 2 ; updateui item 2
	gidriverbox FLbox strfilename(gSsounds[0]), 1, 1, 18, 300, 50, 500, 100
	FLsetTextSize 24, gidriverindex
	FLsetFont 2, gidriverindex

	gkxfade, gixfade FLslider "Crossfade", 0, 1, 0, 5, -1, 400, 50, 200, 150
	FLsetTextSize 24, gixfade
	FLsetFont 2, gixfade
	FLsetVal_i 0.5, gixfade

	gkblur, giblur FLbutton "Blur", 1, 0, 3, 100, 30, 0, 300, 0, iupdateui, 0, 1, 3 ; updateui item 3
	gkblurtime, giblurtime FLslider "Blur time", 0, 1, 0, 5, -1, 300, 30, 100, 300
	
	gkenvelope, gienvelope FLslider "Grain envelope", 0.1, 0.9, 0, 5, -1, 400, 20, 0, 350
	FLsetVal_i 0.4, gienvelope

	gkgrainsize, gigrainsize FLslider "Grain size", 0.01, 3, 0, 5, -1, 400, 20, 0, 400
	FLsetVal_i 1.2, gigrainsize

	gkpitchshift, gipitchshift FLbutton "Repitch", 1, 0, 3, 100, 30, 0, 450, -1
	gkmatchamps, gimatchamps FLbutton "Match amps", 1, 0, 3, 100, 30, 200, 450, -1

	gkreadmode, gireadmode FLcount "Read mode", 0, 1, 1, 1, 1, 150, 30, 0, 250, 0, iupdateui, 0, 1, 4 ; updateui item 4
	gkstretch, gistretch FLslider "Stretch", 0, 0.999, 0, 5, -1, 250, 30, 150, 250
	

	gkplay, giplay FLbutton "Play/Stop", 1, 0, 1, 200, 50, 300, 0, 0, iplayer, 0, 200	
	FLsetTextSize 24, giplay
	FLsetColor 40, 140, 40, giplay
	
	giloadingbox FLbox "Analysing", 1, 1, 72, 800, 500, 0, 0
	
FLpanelEnd
FLrun

; not shown to begin with
FLhide giblurtime
FLhide gistretch




/*
 * Alter UI elements according to specified item/mode
 */
instr updateui
	item = p4
	if (item == 1) then		; corpus sound index changed
		FLsetText strfilename(gSsounds[i(gkcorpusindex)]), gicorpusbox

	elseif (item == 2) then 	; driver sound index changed
		FLsetText strfilename(gSsounds[i(gkdriverindex)]), gidriverbox

	elseif (item == 3) then		; blur checkbox changed
		if (i(gkblur) == 1) then
			FLshow giblurtime
		else
			FLhide giblurtime
		endif
	elseif (item == 4) then		; read mode changed
		if (i(gkreadmode) == 1) then
			FLshow gistretch
			FLhide gienvelope
			FLhide gigrainsize
		else 
			FLhide gistretch
			FLshow gienvelope
			FLshow gigrainsize
		endif
	endif
	turnoff
endin




; The corpus sounds
gicorpuswave[] init lenarray(gSsounds)

; ftables for MFCC corpus data, initialise to -1 and create in analysis instrument
gicorpus[] init lenarray(gSsounds)

; allow only one playing instance
giplaying = 0

; include analysis and matching opcodes
#include "mfcc_matching.udo"


/*
 * Step through gSsounds by index (p4), load to ftable and perform mfcc analysis
 */
instr analyseloop
	index = p4
	if (index > lenarray(gSsounds) - 1) then

		; everything analysed, show UI accordingly
		FLhide giloadingbox
	else
	
		; wave still to analyse
		gicorpuswave[index] ftgen 0, 0, 0, 1, gSsounds[index], 0, 0, 1
		event_i "i", "analysecorpus", 0, 1, index
	endif
	turnoff
endin


/*
 * Analyse corpus sound in gicorpuswave, in one k-cycle,
 * then invoke the "analyseloop" instrument again to continue
 */
instr analysecorpus
	index = p4
	ilen = ftlen(gicorpuswave[index]) / ftsr(gicorpuswave[index])
	imaxitems = gimfccbands * (ftlen(gicorpuswave[index]) / gifftsize)
	gicorpus[index] ftgen 0, 0, -imaxitems, 2, 0
	ktimek timeinstk
	if (ktimek == 1) then
		kcycles = ilen*kr
		kcount init 0
loop:
		asig loscil 1, 1, gicorpuswave[index], 1
		kdx init 0
		kmfcc[], ktrig getmfccs asig, gifftsize, gimfccbands
		if (ktrig == 1) then
			kfb = 0
			while (kfb < gimfccbands) do
				tabw kmfcc[kfb], kdx, gicorpus[index]
				kfb += 1
				kdx += 1
			od
		endif
		loop_lt kcount, 1, kcycles, loop
	else
		schedkwhen 1, 1, 1, "analyseloop", 0, 1, index+1
		turnoff
	endif
endin


/*
 * Play the driver sound, obtain the nearest matching index of the corpus sound, and
 * then play that from the relevant starting index with the "segment" instrument.
 * Output only the left channel for audible comparison purposes.
 */
instr player
	; if playing, turn off, otherwise play
	if (giplaying == 1) then

		; set play/stop button colour to green and turn off
		FLsetColor 40, 140, 40, giplay
		giplaying = 0
		turnoff2 p1, 0, 0
	else	
		
		; set play/stop button colour to red
		FLsetColor 140, 40, 40, giplay
		giplaying = 1

		idriverfn = gicorpuswave[i(gkdriverindex)]
		iduration = ftlen(idriverfn) / ftsr(idriverfn)
		p3 = iduration
		
		; after duration, call same instrument to set giplaying to 0
		ktime timeinsts
		if (ktime >= iduration) then
			event "i", "player", 0, 0.1
		endif

		; read the driving sound
		adriver loscil 1, 1, idriverfn, 1

		; do the actual matching to find the best sample point
		kdx, ktrig nearest adriver, gifftsize, gimfccbands, gicorpus[i(gkcorpusindex)]

		; segment time according to the MFCC analysis FFT size
		isegmenttime = (1/sr) * gifftsize
		ksegmenttime = isegmenttime * gkgrainsize

		if (gkreadmode == 0) then

			; each grain is an instrument call that chnmixes to "segments"
			schedkwhen ktrig, 0, 0, "segment", random:k(0, isegmenttime), ksegmenttime, gicorpuswave[i(gkcorpusindex)], kdx
			amatched chnget "segments"
			chnclear "segments"
		else
	
			; grains are read using sndwarp
			ilen = ftlen(gicorpuswave[i(gkcorpusindex)])
			icsr = ftsr(gicorpuswave[i(gkcorpusindex)])
			icduration = ilen / icsr
			icps = 1/(ilen/icsr) 
			aphs, a_ syncphasor icps*(1-gkstretch), a(ktrig)
			apos = (((aphs * ilen) + kdx) / ilen) * icduration
			amatched sndwarp 0.7, apos, 1, gicorpuswave[i(gkcorpusindex)], 0, gifftsize/2, 64, 4, 99, 1
		endif
		
		; delay to account for matching 
		adriver vdelay adriver, (1/sr)*gifftsize, 1

		; if pvs modifications are required
		if (gkblur == 1 || gkpitchshift == 1) then
			ir = 1024
			fsegments pvsanal amatched, ir, ir/4, ir, 1

			if (gkpitchshift == 1) then

				; additional pitch matching; rough at the moment
				fsource pvsanal adriver, ir, ir/4, ir, 1
				kfrsrc, kasrc pvspitch fsource, 0.1
				kfrseg, kaseg pvspitch fsegments, 0.1
				kpscale init 1

				if (kfrseg != 0) then
					kpscale = kfrsrc / kfrseg
				endif

				fpitchmatched pvscale fsegments, kpscale


				if (gkblur == 1) then
					fouts pvsblur fpitchmatched, (gkblurtime*ksegmenttime*2)+(ksegmenttime*0.5), isegmenttime*6
				else
					fouts = fpitchmatched
				endif

			elseif (gkblur == 1) then
				fouts pvsblur fsegments, (gkblurtime*ksegmenttime*2)+(ksegmenttime*0.5), isegmenttime*6
				adriver vdelay adriver, (1/sr)*ir, 1
			endif
			amatched pvsynth fouts
			;asegments balance asegmentstemp, asegments
		endif
 
		if (gkmatchamps == 1) then
			amatched balance amatched, adriver
		endif
		; output with crossfader
		aout = (adriver*gkxfade) + (amatched*(1-gkxfade))
		outs aout, aout
	endif
endin

/*
 * Play sound grain from table p4 with the start index p5, with basic enveloping.
 */
instr segment
	imidpoint = i(gkenvelope)
	ifadepoint = (1-imidpoint)*0.5
	iamp = 1
	ifn = p4
	il = ftlen(ifn)
	isec = il/sr         
	ist = p5 
	icps = 1/isec  
	aphs phasor icps      
	andx = aphs * il
	aout tablei andx+ist, ifn
	aout *= linseg:a(0, p3*ifadepoint, iamp, p3*imidpoint, iamp, p3*ifadepoint, 0)
	chnmix aout, "segments"
endin



</CsInstruments>
<CsScore>
f99 0 512 9 0.5 1 0 ; half sine
f0 3600
i"analyseloop"  0 1 0
</CsScore>
</CsoundSynthesizer>