DSGenerator/generate.py at main · dssynths/DSGenerator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
# dependencies for file reading
import json
import sys
import itertools
import numpy as np
import os
import soundfile as sf
import math

import librosa # conda install -c conda-forge librosa

# make script paths from one level up avaialble for import
script_path = os.path.realpath(os.path.dirname(__name__))
os.chdir(script_path)
sys.path.append(script_path)

from parammanager import paramManager
from nsjsonmanager import nsjson


from genericsynth import synthInterface as SI
from filewrite import fileHandler

import importlib


'''
This code will generate a dataset of textures consiting of pop or drip textures.

The files are generated using 3 (or N) different parameters that are sampled over a range of values. The parameters that are
developed for the sound model are exposed via the config_file.json. The three parameters for a range of sounds
including the pop and drip textures are:
    rate (average events per second),
    irregularity in temporal distribution (using a gaussian distribution around each evenly-spaced time value), and
    the center frequency of bp filter

The generator.py to be independent of any synth, and dependent only on the config file. That is, the same generator.py should work for all DSSynths.
    a) It can get the synth name from the config file, and then import it "dynamically"
    b) It can set any params the user wants to fix (not iterate over)
    c) names in the config file should correspond to names in the synth (right now the generator constructs synth param names from those use in the cofig file by adding "_exp", etc.

There is also a "visualizer" notebook need not generate files at all. The function of the visualizer is to
interactively explore and create textures using synthinterface and sound models.
It is mostly for understanding the synthesizer, and exploring parameters that you might help you decide how
you want to specify them in your config file.

The parameter values are each sampled liniearly on an exponential scale, and specified in:
rate = 2^r_exp  (so r_exp in [0,4] means the rate ranges from 2 to 16)
irregularity = .04*10^irreg_exp; sd = irregularity/events per second  (so irreg_exp in [0,1] means irregularity ranges from completely regular, to Poisson process)
cf = 440*2^cf_exp  (so cf_exp in [0,1] means cf ranges from 440 to 880, one octave)

Generator use:
For each parameter setting, first a "long" signal (of lentgth longDurationSecs) is generated, and then
it is sliced into segments (called chunks) of a length desired for training.

Example: If each parameter is sampled at 5 values, the long signal is 10 seconds and chunkLength is 2 seconds,
then The the total amount of audio generated is 5*5*5*10= 1250 seconds of sound (about 25 hours; ~3Gb at 16K sr).
If each chunk is 2 seconds, then there will be 10/2=5 chunks for each parameter setting, and
5*5*5*5 = 625 files
'''

import argparse

myConfig = {}
soundModels = {}
outputpath = ""

def get_arguments():
    parser = argparse.ArgumentParser(description="myParser")
    parser.add_argument("--configfile", required=True)
    parser.add_argument("--outputpath", required=True)
    return parser.parse_args()

''' Returns a chunked wav files from generated signal '''
def selectChunk(sig, sr, chunkNum, chunkSecs):
        chunkSamples=math.floor(sr*chunkSecs)
        return sig[chunkNum*chunkSamples:(chunkNum+1)*chunkSamples]


def main():

    # folderConsistency()

    args = get_arguments()
    module_name = args.configfile # here, the result is the file name, e.g. config or config-special
    outputpath = args.outputpath

    # Not use __import__, use import_module instead according to @bruno desthuilliers's suggestion
    # __import__(module_name) # here, dynamic load the config module
    # MyConfig = sys.modules[module_name].MyConfig # here, get the MyConfig class
    # MyConfig = importlib.import_module(module_name)

    with open(module_name) as json_file:
        MyConfig = json.load(json_file)
        print("Reading parameters for generating ", "red" , MyConfig['soundname'], " texture.. ")
        # for p in MyConfig['params']:
        #     p['formula'] = eval("lambda *args: " + p['formula'])
        # for p in MyConfig['fixedParams']:
        #     p['formula'] = eval("lambda *args: " + p['formula'])

    loadSoundModels(MyConfig)
    MyConfig["outputpath"] = outputpath

    # from args.configfile import MyConfig # <-- how is that possible?
    generate(MyConfig)

    # print(MyConfig["params"])

def loadSoundModels(MyConfig):
    dirpath = os.getcwd()
    # modules = [f for f in os.listdir(os.path.dirname(dirpath)) if f[0] != "." and f[0] != "_"]
    # for module in modules:
    spec = importlib.util.spec_from_file_location(dirpath, os.path.join(dirpath,MyConfig["soundname"]+".py"))
    mod = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(mod)
    soundModels["sound"] = mod
    # mod_name = file[:-3]   # strip .py at the end
    # exec('from soundModels' + ' import ' + os.path.abspath(mod_name))

    # files = [f for f in os.listdir(os.path.dirname(dirpath+module+"/")) if f[0] != "." and f[0] != "_"]
    #for file in files:
    #    spec = importlib.util.spec_from_file_location(module, os.path.join(dirpath,module +"/" + file))
    #    mod = importlib.util.module_from_spec(spec)
    #    spec.loader.exec_module(mod)
    # importlib.import_module(dirpath + directory)

def generate(MyConfig):

    '''Initializes file through a filemanager'''
    fileHandle = fileHandler()
    # MyConfig["outPath"]
    dirpath = "/"
    outputpath = MyConfig["outputpath"]

    if os.path.isdir(outputpath):
        print("Outpath exists")
    else:
        print(outputpath)
        os.mkdir(outputpath)

    print("Enumerating parameter combinations..")

    '''
        for every combination of cartesian parameter
        for every chunk
            Create chunk wav files
            Create chunk parameter files
    '''

    '''2 arrays for normalised and naturalised ranges'''
    userRange = []
    synthRange = []
    paramArr = MyConfig["params"]
    fixedParams = MyConfig["fixedParams"]

    for p in MyConfig["params"]:
            userRange.append(np.linspace(p["user_minval"], p["user_maxval"], p["user_nvals"], endpoint=True))
            synthRange.append(np.linspace(p["synth_minval"], p["synth_maxval"], p["user_nvals"], endpoint=True))

    userParam = list(itertools.product(*userRange))
    synthParam = list(itertools.product(*synthRange))

    numChunks=MyConfig["numChunks"]
    #math.floor(MyConfig["soundDuration"]/MyConfig["chunkSecs"])  #Total duraton DIV duraiton of each chunk

    totalDuration = len(userParam)*MyConfig["soundDuration"] # Total duration of the audio textures generated for this dataset'''

    '''Set fixed parameters prior to the generation'''
    # print(soundModels[MyConfig["soundname"]].PatternSynth)
    barsynthclass = getattr(soundModels["sound"],MyConfig["soundname"])

    '''use a sample rate and rng seed if initialized'''
    if not "rngseed" in MyConfig:
        barsynth= barsynthclass(sr=MyConfig["computeSR"])
    elif MyConfig["rngseed"] == None:
        ''' Generate random seed '''
        seed = np.random.randint(0, np.power(2,32))
        print("Using user random seed", seed)
        barsynth= barsynthclass(sr=MyConfig["computeSR"], rngseed=seed)
    else:
        print("Using user random seed", MyConfig["rngseed"])
        barsynth= barsynthclass(sr=MyConfig["computeSR"], rngseed=MyConfig["rngseed"])
    print(barsynth)

    # Manually set the parameters to Natural
    for params in paramArr:
        params["synth_units"] = "natural"

    for fixparams in fixedParams:
        '''Setting in natural ranges'''
        fixparams["synth_units"] = "natural"
        barsynth.setParam(fixparams["synth_pname"], fixparams["synth_val"])

    sg = nsjson.nsJson("/", outputpath, 1, MyConfig["datafileSR"], MyConfig['soundname'])

    '''Only initialize if record is in tfrecord format'''
    if MyConfig["recordFormat"] == "tfrecords":
        try:
            from tfrecordmanager import tfrecordManager
            tfr=tfrecordManager.tfrecordManager()
        except:
            print("Please install tfrecords with <pip install -r requirements_tf.txt --src '.'> and run again")
            sys.exit()

    '''files for agrregate tfrecord writing'''
    audioSegments = []
    pfnames = []
    soundDurations = []
    segmentNum = []
    usertfP = []
    synthtfP = []

    examples = 1
    if "examples" in MyConfig :
        examples = MyConfig["examples"]

    for x in range(examples) :
        '''Enumerate parameters'''
        for index in range(len(userParam)): # iterating through a caretesian product of lists

            '''Stepping through enumerated dataset'''
            userP = userParam[index]
            synthP = synthParam[index]

            for paramInd in range(len(paramArr)):
                # if paramArr[paramInd]["synth_units"] == "norm":
                #     '''Setting in Normal ranges'''
                #     barsynth.setParamNorm(paramArr[paramInd]["synth_pname"], synthP[paramInd])
                # else:
                '''Setting in natural ranges'''
                barsynth.setParam(paramArr[paramInd]["synth_pname"], synthP[paramInd])

            barsig=barsynth.generate(MyConfig["soundDuration"])
            numChunks=MyConfig["numChunks"]#math.floor(MyConfig["soundDuration"]/MyConfig["chunkSecs"])  #Total duraton DIV duraiton of each chunk
            chunkSecs = MyConfig["soundDuration"]/numChunks

            for chnk in range(numChunks):

                fileHandle = fileHandler()

                if numChunks == 1:
                    '''Write wav'''
                    #wavName = fileHandle.makeName(MyConfig["soundname"], paramArr, fixedParams, userP, v)
                    wavName = fileHandle.makeName(MyConfig["soundname"], paramArr, userP, None, examples, x)
                    wavPath = fileHandle.makeFullPath(outputpath,wavName,".wav")
                    chunkedAudio = selectChunk(barsig, MyConfig["computeSR"], chnk, chunkSecs)

                    if MyConfig["computeSR"] != MyConfig["datafileSR"]:
                        newsig=librosa.resample(chunkedAudio, orig_sr=MyConfig["computeSR"], target_sr=MyConfig["datafileSR"])
                        sf.write(wavPath, newsig, MyConfig["datafileSR"])
                    else:
                        newsig = chunkedAudio
                        sf.write(wavPath, chunkedAudio, MyConfig["datafileSR"])

                    '''Write params'''
                    #paramName = fileHandle.makeName(MyConfig["soundname"], paramArr, fixedParams, userP, v)
                    paramName = fileHandle.makeName(MyConfig["soundname"], paramArr, userP, None, examples, x)
                    pfName = fileHandle.makeFullPath(outputpath, paramName,".params")
                else:
                    wavName = fileHandle.makeName(MyConfig["soundname"], paramArr, userP, chnk, examples, x)
                    wavPath = fileHandle.makeFullPath(outputpath,wavName,".wav")
                    chunkedAudio = selectChunk(barsig, MyConfig["computeSR"], chnk, chunkSecs)

                    if MyConfig["computeSR"] != MyConfig["datafileSR"]:
                        newsig=librosa.resample(chunkedAudio, orig_sr=MyConfig["computeSR"], target_sr=MyConfig["datafileSR"])
                        sf.write(wavPath, newsig, MyConfig["datafileSR"], subtype='PCM_16')
                    else:
                        newsig = chunkedAudio
                        sf.write(wavPath, chunkedAudio, MyConfig["datafileSR"], subtype='PCM_16')

                    '''Write params'''
                    #paramName = fileHandle.makeName(MyConfig["soundname"], paramArr, fixedParams, userP, v)
                    paramName = fileHandle.makeName(MyConfig["soundname"], paramArr, userP, chnk, examples, x)
                    pfName = fileHandle.makeFullPath(outputpath, paramName,".params")

                if MyConfig["recordFormat"] == "params" or MyConfig["recordFormat"]==0:
                    pm=paramManager.paramManager(wavPath, fileHandle.getFullPath())
                    pm.initParamFiles(overwrite=True)

                    '''Write parameters and meta-parameters'''
                    for pnum in range(len(paramArr)):
                            #pm.addParam(pfName, paramArr[pnum]['synth_pname'], [0,MyConfig["soundDuration"]], [userP[pnum], userP[pnum]], units=paramArr[pnum]['synth_units'], nvals=paramArr[pnum]['user_nvals'], minval=paramArr[pnum]['user_minval'], maxval=paramArr[pnum]['user_maxval'], origUnits=None, origMinval=paramArr[pnum]['synth_minval'], origMaxval=paramArr[pnum]['synth_maxval'])
                            pm.addParam(pfName, paramArr[pnum]['synth_pname'], [0,chunkSecs], [userP[pnum], userP[pnum]], units=paramArr[pnum]['synth_units'], nvals=paramArr[pnum]['user_nvals'], minval=paramArr[pnum]['user_minval'], maxval=paramArr[pnum]['user_maxval'], origUnits=None, origMinval=paramArr[pnum]['synth_minval'], origMaxval=paramArr[pnum]['synth_maxval'])

                            if "user_doc" in paramArr[pnum] and paramArr[pnum]["user_doc"] != "" :
                                pm.addMetaParam(pfName, paramArr[pnum]['synth_pname']+"_user_doc",paramArr[pnum]['user_doc'])

                            pm.addMetaParam(pfName, paramArr[pnum]['synth_pname']+"_synth_doc",barsynth.getParam(paramArr[pnum]["synth_pname"],"synth_doc"))

                    for pnum in range(len(fixedParams)):
                        ######pm.addParam(pfName, fixedParams[pnum]['synth_pname'], [0,MyConfig["soundDuration"]], [fixedParams[pnum]["synth_val"], fixedParams[pnum]["synth_val"]], units=fixedParams[pnum]['synth_units'], nvals=2, origUnits=None)
                        #pm.addMetaParam(pfName, fixedParams[pnum]['synth_pname']+" (FIXED_VAL)",barsynth.getParam(fixedParams[pnum]["synth_pname"]))
                        #if "user_doc" in fixedParams[pnum] and fixedParams[pnum]["user_doc"] != "" :
                        #    pm.addMetaParam(pfName, fixedParams[pnum]['synth_pname']+"_user_doc",fixedParams[pnum]['user_doc'])
                        #pm.addMetaParam(pfName, fixedParams[pnum]['synth_pname']+"_synth_doc",barsynth.getParam(fixedParams[pnum]["synth_pname"],"synth_doc"))

                        docstr= f" {barsynth.getParam(fixedParams[pnum]['synth_pname'])}, "
                        if "user_doc" in fixedParams[pnum] and fixedParams[pnum]["user_doc"] != "" :
                            docstr=docstr+fixedParams[pnum]['user_doc']+ ", "
                        docstr=docstr+barsynth.getParam(fixedParams[pnum]["synth_pname"],"synth_doc")
                        pm.addMetaParam(pfName, "(FIXED_VAL) " + fixedParams[pnum]['synth_pname'], docstr)


                elif MyConfig["recordFormat"] == "nsjson" or MyConfig["recordFormat"] == 1:

                    sg.storeSingleRecord(wavName)
                    for pnum in range(len(paramArr)):
                        sg.addParams(wavName, paramArr[pnum]['synth_pname'], userP[pnum], barsynth.getParam(paramArr[pnum]['synth_pname']))
                    sg.write2File("nsjson.json")

                elif MyConfig["recordFormat"] == "tfrecords":

                    tfr.__addFeatureData__(pfName, [0, chunkSecs], newsig, chnk)
                    # MyConfig["shard_size"], MyConfig["samplerate"], totalDuration)

                    for pnum in range(len(paramArr)):
                        # paramArr[pnum]['synth_units'], paramArr[pnum]['user_nvals'], paramArr[pnum]['user_minval'], paramArr[pnum]['user_maxval'], paramArr[pnum]['synth_minval'], paramArr[pnum]['synth_maxval']
                        tfr.__addParam__(paramArr[pnum], userP[pnum])

                    for pnum in range(len(fixedParams)):
                        tfr.__addParam__(fixedParams[pnum], fixedParams[pnum]["synth_val"])

                    #print("size is " , tfr.__tfRetSize__())
                    tfr.__tfUpdateSize__()

                    '''Usage of tfrecords with single record per file'''
                    if MyConfig["tftype"] == "single":
                        tfr.__tfwriteOne__(pfName)
                        print("Generated a tfrecord")
                    else:

                        ''' Append and do not write'''
                        if tfr.__tfRetSize__() < MyConfig["shard_size"] :
                            #print("new size with record " , tfr.__tfRetSize__())
                            audioSegments.append(newsig)
                            pfnames.append(pfName)
                            soundDurations.append([0,chunkSecs])
                            segmentNum.append(chnk)
                            usertfP.append(userP)
                            synthtfP.append(synthtfP)
                        else:
                            ''' Write and then append'''
                            print(len(pfnames))
                            tfr.__tfwriteN__(outputpath, pfnames, soundDurations, segmentNum, audioSegments, usertfP, synthtfP, paramArr, fixedParams)
                            audioSegments = []
                            pfnames = []
                            soundDurations = []
                            segmentNum= []
                            usertfP = []
                            synthtfP = []
                            # afresh with current record
                            audioSegments.append(newsig)
                            pfnames.append(pfName)
                            soundDurations.append([0,chunkSecs])
                            segmentNum.append(chnk)
                            usertfP.append(userP)
                            synthtfP.append(synthtfP)

                            tfr.__addFeatureData__(pfName, [0, chunkSecs], newsig, chnk)
                            # MyConfig["shard_size"], MyConfig["samplerate"], totalDuration)

                            for pnum in range(len(paramArr)):
                                # paramArr[pnum]['synth_units'], paramArr[pnum]['user_nvals'], paramArr[pnum]['user_minval'], paramArr[pnum]['user_maxval'], paramArr[pnum]['synth_minval'], paramArr[pnum]['synth_maxval']
                                tfr.__addParam__(paramArr[pnum], userP[pnum])

                            for pnum in range(len(fixedParams)):
                                tfr.__addParam__(fixedParams[pnum], fixedParams[pnum]["synth_val"])

                            #print("size is " , tfr.__tfRetSize__())
                            tfr.__tfUpdateSize__() #might be a problem in edge case when each record is as big as max tfrecord size.
                            #print("Updated size is " , tfr.__tfRetSize__())

                        if index == (len(userParam) - 1) and chnk == (numChunks-1):
                            print(len(pfnames))
                            tfr.__tfwriteN__(outputpath, pfnames, soundDurations, segmentNum, audioSegments, usertfP, synthtfP, paramArr, fixedParams)

                else:
                    print("Not recognized format")

    # if MyConfig["recordFormat"] == "tfrecords" and MyConfig["tftype"] == "shards":

    #     tfr=tfrecordManager.tfrecordManager()

    #     print("Number of records per shard", len(pfnames))

    #     pfName = fileHandle.makeFullPath(outputpath, "shard"+str(fileid),".params")
    #     outrecord = pfName.split(".params")[0]+'.tfrecord'

    #     tfr.__tfwriteN__(outrecord, pfnames, soundDurations, segmentNum, audioSegments, userParam, synthParam, paramArr, fixedParams, numChunks, beg, end)

    #tfm=tfrecordManager.tfrecordManager(vFilesParam[v], outPath)
    #data,sr = librosa.core.load(outPath + fname + '--v-'+'{:03}'.format(v)+'.wav',sr=16000)
    #print(len(data))
    #tfm.addFeature(vFilesParam[v], 'audio', [0,len(data)], data, units='samples', nvals=len(data), minval=0, maxval=0)
    #for pnum in range(len(paramArr)):
    #   print(pnum)
    #   tfm.addFeature(vFilesParam[v], paramArr[pnum]['pname'], [0,data['soundDuration']], [enumP[pnum], enumP[pnum]], units=paramArr[pnum]['units'], nvals=paramArr[pnum]['nvals'], minval=paramArr[pnum]['minval'], maxval=paramArr[pnum]['maxval'])
    #tfm.writeRecordstoFile()

if __name__ == '__main__':
    main()