From 8bd739e431ec9a0973f4d05b71290851ff06d681 Mon Sep 17 00:00:00 2001 From: jreus Date: Fri, 11 Oct 2019 18:34:47 +0200 Subject: [PATCH 1/3] added bufferClassify function --- pyAudioAnalysis.egg-info/PKG-INFO | 10 ++ pyAudioAnalysis.egg-info/SOURCES.txt | 19 +++ pyAudioAnalysis.egg-info/dependency_links.txt | 1 + pyAudioAnalysis.egg-info/not-zip-safe | 1 + pyAudioAnalysis.egg-info/top_level.txt | 1 + pyAudioAnalysis/audioTrainTest.py | 112 +++++++++--------- 6 files changed, 90 insertions(+), 54 deletions(-) create mode 100644 pyAudioAnalysis.egg-info/PKG-INFO create mode 100644 pyAudioAnalysis.egg-info/SOURCES.txt create mode 100644 pyAudioAnalysis.egg-info/dependency_links.txt create mode 100644 pyAudioAnalysis.egg-info/not-zip-safe create mode 100644 pyAudioAnalysis.egg-info/top_level.txt diff --git a/pyAudioAnalysis.egg-info/PKG-INFO b/pyAudioAnalysis.egg-info/PKG-INFO new file mode 100644 index 000000000..aad3b337e --- /dev/null +++ b/pyAudioAnalysis.egg-info/PKG-INFO @@ -0,0 +1,10 @@ +Metadata-Version: 1.0 +Name: pyAudioAnalysis +Version: 0.2.5 +Summary: Python Audio Analysis Library: Feature Extraction, Classification, Segmentation and Applications +Home-page: https://github.com/tyiannak/pyAudioAnalysis +Author: Theodoros Giannakopoulos +Author-email: tyiannak@gmail.com +License: Apache License, Version 2.0 +Description: UNKNOWN +Platform: UNKNOWN diff --git a/pyAudioAnalysis.egg-info/SOURCES.txt b/pyAudioAnalysis.egg-info/SOURCES.txt new file mode 100644 index 000000000..82e35372c --- /dev/null +++ b/pyAudioAnalysis.egg-info/SOURCES.txt @@ -0,0 +1,19 @@ +README.md +setup.py +pyAudioAnalysis/__init__.py +pyAudioAnalysis/analyzeMovieSound.py +pyAudioAnalysis/audacityAnnotation2WAVs.py +pyAudioAnalysis/audioAnalysis.py +pyAudioAnalysis/audioAnalysisRecordAlsa.py +pyAudioAnalysis/audioBasicIO.py +pyAudioAnalysis/audioFeatureExtraction.py +pyAudioAnalysis/audioSegmentation.py +pyAudioAnalysis/audioTrainTest.py +pyAudioAnalysis/audioVisualization.py +pyAudioAnalysis/convertToWav.py +pyAudioAnalysis/utilities.py +pyAudioAnalysis.egg-info/PKG-INFO +pyAudioAnalysis.egg-info/SOURCES.txt +pyAudioAnalysis.egg-info/dependency_links.txt +pyAudioAnalysis.egg-info/not-zip-safe +pyAudioAnalysis.egg-info/top_level.txt \ No newline at end of file diff --git a/pyAudioAnalysis.egg-info/dependency_links.txt b/pyAudioAnalysis.egg-info/dependency_links.txt new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pyAudioAnalysis.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/pyAudioAnalysis.egg-info/not-zip-safe b/pyAudioAnalysis.egg-info/not-zip-safe new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/pyAudioAnalysis.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/pyAudioAnalysis.egg-info/top_level.txt b/pyAudioAnalysis.egg-info/top_level.txt new file mode 100644 index 000000000..b3b85b848 --- /dev/null +++ b/pyAudioAnalysis.egg-info/top_level.txt @@ -0,0 +1 @@ +pyAudioAnalysis diff --git a/pyAudioAnalysis/audioTrainTest.py b/pyAudioAnalysis/audioTrainTest.py index cf20251be..954a81191 100644 --- a/pyAudioAnalysis/audioTrainTest.py +++ b/pyAudioAnalysis/audioTrainTest.py @@ -112,9 +112,9 @@ def randSplitFeatures(features): This function splits a feature set for training and testing. ARGUMENTS: - - features: a list ([numOfClasses x 1]) whose elements + - features: a list ([numOfClasses x 1]) whose elements containt numpy matrices of features. - each matrix features[i] of class i is + each matrix features[i] of class i is [n_samples x numOfDimensions] - per_train: percentage RETURNS: @@ -166,7 +166,7 @@ def trainSVM(features, Cparam): ''' [X, Y] = listOfFeatures2Matrix(features) - svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear', probability = True) + svm = sklearn.svm.SVC(C = Cparam, kernel = 'linear', probability = True) svm.fit(X,Y) return svm @@ -188,7 +188,7 @@ def trainSVM_RBF(features, Cparam): ''' [X, Y] = listOfFeatures2Matrix(features) - svm = sklearn.svm.SVC(C = Cparam, kernel = 'rbf', probability = True) + svm = sklearn.svm.SVC(C = Cparam, kernel = 'rbf', probability = True) svm.fit(X,Y) return svm @@ -263,29 +263,29 @@ def trainExtraTrees(features, n_estimators): return et -def trainSVMregression(Features, Y, Cparam): - svm = sklearn.svm.SVR(C = Cparam, kernel = 'linear') - svm.fit(Features,Y) +def trainSVMregression(Features, Y, Cparam): + svm = sklearn.svm.SVR(C = Cparam, kernel = 'linear') + svm.fit(Features,Y) train_err = numpy.mean(numpy.abs(svm.predict(Features) - Y)) return svm, train_err -def trainSVMregression_rbf(Features, Y, Cparam): - svm = sklearn.svm.SVR(C = Cparam, kernel = 'rbf') - svm.fit(Features,Y) +def trainSVMregression_rbf(Features, Y, Cparam): + svm = sklearn.svm.SVR(C = Cparam, kernel = 'rbf') + svm.fit(Features,Y) train_err = numpy.mean(numpy.abs(svm.predict(Features) - Y)) return svm, train_err -def trainRandomForestRegression(Features, Y, n_estimators): +def trainRandomForestRegression(Features, Y, n_estimators): rf = sklearn.ensemble.RandomForestRegressor(n_estimators = n_estimators) rf.fit(Features,Y) train_err = numpy.mean(numpy.abs(rf.predict(Features) - Y)) return rf, train_err -def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, - classifier_type, model_name, +def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, + classifier_type, model_name, compute_beat=False, perTrain=0.90): ''' This function is used as a wrapper to segment-based audio feature extraction and classifier training. @@ -300,11 +300,11 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, ''' # STEP A: Feature Extraction: - [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs, - mt_win, - mt_step, - st_win, - st_step, + [features, classNames, _] = aF.dirsWavFeatureExtraction(list_of_dirs, + mt_win, + mt_step, + st_win, + st_step, compute_beat=compute_beat) if len(features) == 0: @@ -327,15 +327,15 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, elif classifier_type == "randomforest": classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "knn": - classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) + classifier_par = numpy.array([1, 3, 5, 7, 9, 11, 13, 15]) elif classifier_type == "gradientboosting": - classifier_par = numpy.array([10, 25, 50, 100,200,500]) + classifier_par = numpy.array([10, 25, 50, 100,200,500]) elif classifier_type == "extratrees": - classifier_par = numpy.array([10, 25, 50, 100,200,500]) + classifier_par = numpy.array([10, 25, 50, 100,200,500]) # get optimal classifeir parameter: features2 = [] - for f in features: + for f in features: fTemp = [] for i in range(f.shape[0]): temp = f[i,:] @@ -358,7 +358,7 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, # STEP C: Save the classifier to file if classifier_type == "svm": - classifier = trainSVM(featuresNew, bestParam) + classifier = trainSVM(featuresNew, bestParam) elif classifier_type == "svm_rbf": classifier = trainSVM_RBF(featuresNew, bestParam) elif classifier_type == "randomforest": @@ -390,7 +390,7 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, classifier_type == "gradientboosting" or \ classifier_type == "extratrees": with open(model_name, 'wb') as fid: - cPickle.dump(classifier, fid) + cPickle.dump(classifier, fid) fo = open(model_name + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) @@ -400,7 +400,7 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, cPickle.dump(st_win, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(st_step, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(compute_beat, fo, protocol=cPickle.HIGHEST_PROTOCOL) - fo.close() + fo.close() def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, @@ -451,7 +451,7 @@ def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, f_final.append(numpy.array(f_temp)) regression_labels.append(numpy.array(cur_regression_labels)) # cur_regression_labels is the list of values for the current regression problem - regression_names.append(ntpath.basename(c).replace(".csv", "")) # regression task name + regression_names.append(ntpath.basename(c).replace(".csv", "")) # regression task name if len(features) == 0: print("ERROR: No data found in any input folder!") return @@ -461,7 +461,7 @@ def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, # TODO: ARRF WRITE???? # STEP B: classifier Evaluation and Parameter Selection: if model_type == "svm" or model_type == "svm_rbf": - model_params = numpy.array([0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0]) + model_params = numpy.array([0.001, 0.005, 0.01, 0.05, 0.1, 0.25, 0.5, 1.0, 5.0, 10.0]) elif model_type == "randomforest": model_params = numpy.array([5, 10, 25, 50, 100]) @@ -474,9 +474,9 @@ def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, for iRegression, r in enumerate(regression_names): # get optimal classifeir parameter: print("Regression task " + r) - bestParam, error, berror = evaluateRegression(f_final[iRegression], - regression_labels[iRegression], - 100, model_type, + bestParam, error, berror = evaluateRegression(f_final[iRegression], + regression_labels[iRegression], + 100, model_type, model_params) errors.append(error) errors_base.append(berror) @@ -501,7 +501,7 @@ def featureAndTrainRegression(dir_name, mt_win, mt_step, st_win, st_step, if model_type == "svm" or model_type == "svm_rbf" or model_type == "randomforest": with open(model_name + "_" + r, 'wb') as fid: - cPickle.dump(classifier, fid) + cPickle.dump(classifier, fid) fo = open(model_name + "_" + r + "MEANS", "wb") cPickle.dump(MEAN, fo, protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(STD, fo, protocol=cPickle.HIGHEST_PROTOCOL) @@ -581,7 +581,7 @@ def load_model(model_name, is_regression=False): STD = numpy.array(STD) with open(model_name, 'rb') as fid: - SVM = cPickle.load(fid) + SVM = cPickle.load(fid) if is_regression: return(SVM, MEAN, STD, mt_win, mt_step, st_win, st_step, compute_beat) @@ -748,13 +748,13 @@ def evaluateRegression(features, labels, n_exp, method_name, Params): l_test = [labels[randperm[i + n_train]] for i in range(n_samples - n_train)] - # train multi-class svms: - f_train = numpy.matrix(f_train) - if method_name == "svm": - [classifier, train_err] = trainSVMregression(f_train, + # train multi-class svms: + f_train = numpy.matrix(f_train) + if method_name == "svm": + [classifier, train_err] = trainSVMregression(f_train, l_train, C) - elif method_name == "svm_rbf": + elif method_name == "svm_rbf": [classifier, train_err] = \ trainSVMregression_rbf(f_train, l_train, C) elif method_name == "randomforest": @@ -897,17 +897,9 @@ def pcaDimRed(features, nDims): return (featuresNew, coeff) -def fileClassification(inputFile, model_name, model_type): - # Load classifier: - - if not os.path.isfile(model_name): - print("fileClassification: input model_name not found!") - return (-1, -1, -1) - - if not os.path.isfile(inputFile): - print("fileClassification: wav file not found!") - return (-1, -1, -1) +def bufferClassification(audioBuffer, sampleRate, model_name, model_type): + # Load classifier: if model_type == 'knn': [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat] = load_model_knn(model_name) @@ -915,16 +907,13 @@ def fileClassification(inputFile, model_name, model_type): [classifier, MEAN, STD, classNames, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(model_name) - [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono - x = audioBasicIO.stereo2mono(x) - - if isinstance(x, int): # audio file IO problem + if isinstance(audioBuffer, int): # audio buffer format problem return (-1, -1, -1) - if x.shape[0] / float(Fs) <= mt_win: + if audioBuffer.shape[0] / float(sampleRate) <= mt_win: return (-1, -1, -1) # feature extraction: - [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) + [mt_features, s, _] = aF.mtFeatureExtraction(audioBuffer, sampleRate, mt_win * sampleRate, mt_step * sampleRate, round(sampleRate * st_win), round(sampleRate * st_step)) mt_features = mt_features.mean(axis=1) # long term averaging of mid-term statistics if compute_beat: [beat, beatConf] = aF.beatExtraction(s, st_step) @@ -932,10 +921,25 @@ def fileClassification(inputFile, model_name, model_type): mt_features = numpy.append(mt_features, beatConf) curFV = (mt_features - MEAN) / STD # normalization - [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification + [Result, P] = classifierWrapper(classifier, model_type, curFV) # classification return Result, P, classNames +def fileClassification(inputFile, model_name, model_type): + if not os.path.isfile(model_name): + print("fileClassification: input model_name not found!") + return (-1, -1, -1) + + if not os.path.isfile(inputFile): + print("fileClassification: wav file not found!") + return (-1, -1, -1) + + [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono + x = audioBasicIO.stereo2mono(x) + + return bufferClassification(x, Fs, model_name, model_type) + + def fileRegression(inputFile, model_name, model_type): # Load classifier: From 3a6e5c581496668be41bae0887ed97867c093ae3 Mon Sep 17 00:00:00 2001 From: jreus Date: Fri, 11 Oct 2019 19:40:29 +0200 Subject: [PATCH 2/3] bugfix to bufferClassify --- pyAudioAnalysis/audioBasicIO.py | 33 +++++++++++++++---------------- pyAudioAnalysis/audioTrainTest.py | 4 ++++ 2 files changed, 20 insertions(+), 17 deletions(-) diff --git a/pyAudioAnalysis/audioBasicIO.py b/pyAudioAnalysis/audioBasicIO.py index 3cab5c78e..8410a021d 100644 --- a/pyAudioAnalysis/audioBasicIO.py +++ b/pyAudioAnalysis/audioBasicIO.py @@ -13,26 +13,26 @@ def convertDirMP3ToWav(dirName, Fs, nC, useMp3TagsAsName = False): ''' types = (dirName+os.sep+'*.mp3',) # the tuple of file types - filesToProcess = [] + filesToProcess = [] for files in types: - filesToProcess.extend(glob.glob(files)) + filesToProcess.extend(glob.glob(files)) for f in filesToProcess: #tag.link(f) - audioFile = eyed3.load(f) - if useMp3TagsAsName and audioFile.tag != None: + audioFile = eyed3.load(f) + if useMp3TagsAsName and audioFile.tag != None: artist = audioFile.tag.artist title = audioFile.tag.title if artist!=None and title!=None: if len(title)>0 and len(artist)>0: wavFileName = ntpath.split(f)[0] + os.sep + artist.replace(","," ") + " --- " + title.replace(","," ") + ".wav" else: - wavFileName = f.replace(".mp3",".wav") + wavFileName = f.replace(".mp3",".wav") else: - wavFileName = f.replace(".mp3",".wav") + wavFileName = f.replace(".mp3",".wav") else: - wavFileName = f.replace(".mp3",".wav") + wavFileName = f.replace(".mp3",".wav") command = "avconv -i \"" + f + "\" -ar " +str(Fs) + " -ac " + str(nC) + " \"" + wavFileName + "\""; print(command) os.system(command.decode('unicode_escape').encode('ascii','ignore').replace("\0","")) @@ -50,15 +50,15 @@ def convertFsDirWavToWav(dirName, Fs, nC): filesToProcess = [] for files in types: - filesToProcess.extend(glob.glob(files)) + filesToProcess.extend(glob.glob(files)) newDir = dirName + os.sep + "Fs" + str(Fs) + "_" + "NC"+str(nC) if os.path.exists(newDir) and newDir!=".": - shutil.rmtree(newDir) - os.makedirs(newDir) + shutil.rmtree(newDir) + os.makedirs(newDir) - for f in filesToProcess: - _, wavFileName = ntpath.split(f) + for f in filesToProcess: + _, wavFileName = ntpath.split(f) command = "avconv -i \"" + f + "\" -ar " +str(Fs) + " -ac " + str(nC) + " \"" + newDir + os.sep + wavFileName + "\""; print(command) os.system(command) @@ -78,16 +78,16 @@ def readAudioFile(path): strsig = s.readframes(nframes) x = numpy.fromstring(strsig, numpy.short).byteswap() Fs = s.getframerate() - elif extension.lower() == '.mp3' or extension.lower() == '.wav' or extension.lower() == '.au' or extension.lower() == '.ogg': + elif extension.lower() == '.mp3' or extension.lower() == '.wav' or extension.lower() == '.au' or extension.lower() == '.ogg': try: audiofile = AudioSegment.from_file(path) #except pydub.exceptions.CouldntDecodeError: except: print("Error: file not found or other I/O error. " "(DECODING FAILED)") - return (-1,-1) + return (-1,-1) - if audiofile.sample_width==2: + if audiofile.sample_width==2: data = numpy.fromstring(audiofile._data, numpy.int16) elif audiofile.sample_width==4: data = numpy.fromstring(audiofile._data, numpy.int32) @@ -101,7 +101,7 @@ def readAudioFile(path): else: print("Error in readAudioFile(): Unknown file type!") return (-1,-1) - except IOError: + except IOError: print("Error: file not found or other I/O error.") return (-1,-1) @@ -128,4 +128,3 @@ def stereo2mono(x): return ( (x[:,1] / 2) + (x[:,0] / 2) ) else: return -1 - diff --git a/pyAudioAnalysis/audioTrainTest.py b/pyAudioAnalysis/audioTrainTest.py index 954a81191..fe95f41e8 100644 --- a/pyAudioAnalysis/audioTrainTest.py +++ b/pyAudioAnalysis/audioTrainTest.py @@ -295,6 +295,7 @@ def featureAndTrain(list_of_dirs, mt_win, mt_step, st_win, st_step, st_win, st_step: short-term window and step classifier_type: "svm" or "knn" or "randomforest" or "gradientboosting" or "extratrees" model_name: name of the model to be saved + compute_beat: include beat computation in classifier RETURNS: None. Resulting classifier along with the respective model parameters are saved on files. ''' @@ -908,8 +909,10 @@ def bufferClassification(audioBuffer, sampleRate, model_name, model_type): compute_beat] = load_model(model_name) if isinstance(audioBuffer, int): # audio buffer format problem + print("bufferClassification: bad audio format!") return (-1, -1, -1) if audioBuffer.shape[0] / float(sampleRate) <= mt_win: + print("bufferClassification: too little audio to analyze with medium term window", mt_win) return (-1, -1, -1) # feature extraction: @@ -964,6 +967,7 @@ def fileRegression(inputFile, model_name, model_type): [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono x = audioBasicIO.stereo2mono(x) + # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) mt_features = mt_features.mean(axis=1) # long term averaging of mid-term statistics From b75bd83d6e61287f1507cae3296fa71b64828a11 Mon Sep 17 00:00:00 2001 From: jreus Date: Fri, 11 Oct 2019 19:42:39 +0200 Subject: [PATCH 3/3] added bufferRegression --- pyAudioAnalysis/audioTrainTest.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pyAudioAnalysis/audioTrainTest.py b/pyAudioAnalysis/audioTrainTest.py index fe95f41e8..81a826bd2 100644 --- a/pyAudioAnalysis/audioTrainTest.py +++ b/pyAudioAnalysis/audioTrainTest.py @@ -943,13 +943,8 @@ def fileClassification(inputFile, model_name, model_type): return bufferClassification(x, Fs, model_name, model_type) -def fileRegression(inputFile, model_name, model_type): +def bufferRegression(audioBuffer, sampleRate, model_name, model_type): # Load classifier: - - if not os.path.isfile(inputFile): - print("fileClassification: wav file not found!") - return (-1, -1, -1) - regression_models = glob.glob(model_name + "_*") regression_models2 = [] for r in regression_models: @@ -965,8 +960,8 @@ def fileRegression(inputFile, model_name, model_type): if model_type == 'svm' or model_type == "svm_rbf" or model_type == 'randomforest': [_, _, _, mt_win, mt_step, st_win, st_step, compute_beat] = load_model(regression_models[0], True) - [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono - x = audioBasicIO.stereo2mono(x) + Fs = sampleRate + x = audioBuffer # feature extraction: [mt_features, s, _] = aF.mtFeatureExtraction(x, Fs, mt_win * Fs, mt_step * Fs, round(Fs * st_win), round(Fs * st_step)) @@ -991,6 +986,19 @@ def fileRegression(inputFile, model_name, model_type): return R, regression_names + +def fileRegression(inputFile, model_name, model_type): + + if not os.path.isfile(inputFile): + print("fileClassification: wav file not found!") + return (-1, -1, -1) + + [Fs, x] = audioBasicIO.readAudioFile(inputFile) # read audio file and convert to mono + x = audioBasicIO.stereo2mono(x) + + return bufferRegression(x, Fs, model_name, model_type) + + def lda(data, labels, redDim): # Centre data data -= data.mean(axis=0)