Bach-NoteDetector/bach.py at master · Krishanu230/Bach-NoteDetector · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from pydub import AudioSegment
import matplotlib.pyplot as plt
import numpy as np
import sys
from scipy.io.wavfile import read
import argparse
import configparser

parser = argparse.ArgumentParser()
config = configparser.ConfigParser()
parser.add_argument("-af", "--audioFile", help="Audio File Path")
parser.add_argument("-c", "--config", help="the path of the config file")
args = parser.parse_args()
fileName = args.audioFile
config.read(args.config)
c = config['DEFAULT']

#tweakable parameters
MIN_VOL = float(c.get('MIN_VOL', -30))  #minimum volume for loudness detection in dBFS
MIN_DEL = float(c.get('MIN_DEL', 1.5)) #minimum delta in volume value to count as a peak wrt prev block
MIN_GAP_MS = int(c.get('MIN_GAP_MS', 200))  #after detecting a peak, ignore any fluctuation in this gap window.
SEGMENT_MS = int(c.get('SEGMENT_MS', 50))   #discretize the audio in blocks to calculate volume per block, in ms.

MIN_FREQ_NUMBER = int(c.get('MIN_FREQ_NUMBER', 21)) #default:A0
MAX_FREQ_NUMBER = int(c.get('MAX_FREQ_NUMBER', 108)) #default:C8
NOTE_NAMES = 'C C# D D# E F F# G G# A A# B'.split()

#Inspired From: https://newt.phys.unsw.edu.au/jw/notes.html
def freq_to_number(f): return 69 + 12*np.log2(f/440.0)
def number_to_freq(n): return 440 * 2.0**((n-69)/12.0)
def note_name(n): return NOTE_NAMES[n % 12] + str(int(n/12 - 1))


#def note_to_fftbin(n): return number_to_freq(n)/FREQ_STEP
#imin = max(0, int(np.floor(note_to_fftbin(NOTE_MIN-1))))
#imax = min(SAMPLES_PER_FFT, int(np.ceil(note_to_fftbin(NOTE_MAX+1))))

#find onset by finding peaks in volume of the sample
#Could be improved my more noise reduction and curve smoothening
#But the fundamental problem is it will not detect all peaks.
#We can tune the variables like MIN_GAP_MS etc for a particular
#recording but it will fail to give all peaks.
#TODO: implement a better way of finding note onset by using
#fft to find peaks in the frequency domain not time.
def findOnsetByVolume(volume):
    onsets = []
    for i in range(1, len(volume)):
        if (volume[i] > MIN_VOL and (volume[i] - volume[i - 1]) > MIN_DEL):
            ms = i * SEGMENT_MS
            #ignore any peaks within the minimum gap window after one confirmed peak.
            if (len(onsets) == 0 or ms - onsets[len(onsets)-1] >= MIN_GAP_MS):
                onsets.append(ms)
    return onsets

def identifyNote(audioFrame, sampleRate):
    frameSize = len(audioFrame)
    framesPerFFT = 1   #number of frames to take avg in fft
    samplesPerFFT = frameSize*framesPerFFT
    freqStep = float(sampleRate)/samplesPerFFT

    imin = max(0, int(np.floor(number_to_freq(MIN_FREQ_NUMBER-1)/freqStep)))
    imax = min(samplesPerFFT, int(np.ceil(number_to_freq(MAX_FREQ_NUMBER+1)/freqStep)))

    hanningWindow = 0.5 * (1 - np.cos(np.linspace(0, 2*np.pi, samplesPerFFT, False)))
    buf = np.zeros(samplesPerFFT, dtype=np.float32)
    buf[-frameSize:] = audioFrame

    #FFT the the windowed buffer
    fft = np.fft.rfft(buf * hanningWindow)

    # Get frequency of maximum response in range
    freq = (np.abs(fft[imin:imax]).argmax() + imin) * freqStep

    # Get note number and nearest note
    n = freq_to_number(freq)
    n0 = int(round(n))
    return freq, n0

def main():
    audio = AudioSegment.from_file(fileName)
    audio = audio.high_pass_filter(100)
    volume = [segment.dBFS for segment in audio[::SEGMENT_MS]]
    onsets = findOnsetByVolume(volume)
    #actual_notes = [1.3, 1.75, 2.06, 2.4, 2.755, 3.04, 4.2, 4.5, 4.9, 5.1, 5.4, 5.8, 6.9, 7.3, 7.6, 7.89, 8.2, 8.5, 8.9, 9.29, 9.76, 10.09, 10.4, 10.8, 10.9, 11.4, 11.7, 12.29]
    print("Total Number of detected Notes: {:^5}".format(len(onsets)))
    print(onsets)
    sampleRate, audio = read(fileName)
    audio = audio[:, 0]
    length = audio.shape[0] / sampleRate
    notes = []
    for i in range(len(onsets)):
        start = int(onsets[i]*(sampleRate/1000))
        if (i==(len(onsets)-1)):
            end = len(audio)-1
        else:
            end = int(onsets[i+1]*(sampleRate/1000))
        fs = end-start
        audioFrame = audio[start:start+fs]
        f, n = identifyNote(audioFrame, sampleRate)
        notes.append(note_name(n))
        #print("i: {:^4} freq: {:>5} num: {:>5}".format(i,f, note_name(n)))

    print(notes)
    #for s in actual_notes:
        #plt.axvline(x=s, color='r', linewidth=0.5, linestyle="-")
    for ms in onsets:
        plt.axvline(x=(ms/1000), color='r', linewidth=0.5, linestyle="-")
    x_axis = np.arange(len(volume)) * (SEGMENT_MS / 1000)
    plt.plot(x_axis, volume)
    plt.grid(True)
    plt.show()
    #plt.savefig("OnsetDetection")

if __name__ == "__main__":
    main()