led_matrix_control/audio_env.py at main · pgaskell/led_matrix_control · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# audio_env.py
import numpy as np, math
import sounddevice as sd
import numpy as np
from collections import deque

# ─── CONFIG ────────────────────────────────────────────────────────────────
SAMPLERATE  = 44100
BLOCKSIZE   = 1024

# envelope bands
LOW_BAND    = (50, 150)
HIGH_BAND   = (1000, 5000)
N_BANDS     = 24

LOW_GAIN      = 0.1
HIGH_GAIN     = 3.0

# This is your “panel” config, loadable/savable with patches:
ENV_CONFIG = {
    "envl": {
        "threshold_db": -10,   # raw RMS below this → 0
        "gain_db":      0,    # after smoothing
        "attack":    0.005,  # seconds
        "release":   0.100,  # seconds
        "mode":      "up"    # "up", "down", or "updown"
    },
    "envh": {
        "threshold_db": -10,
        "gain_db":      0,
        "attack":    0.005,
        "release":   0.100,
        "mode":      "up"
    }
}

# ─── PRECOMPUTE FFT BINS ────────────────────────────────────────────────────
freqs     = np.fft.rfftfreq(BLOCKSIZE, d=1.0/SAMPLERATE)
low_bins  = np.where((freqs >= LOW_BAND[0]) & (freqs <= LOW_BAND[1]))[0]
high_bins = np.where((freqs >= HIGH_BAND[0]) & (freqs <= HIGH_BAND[1]))[0]
# audible range: 20 Hz … Nyquist
fmin, fmax = 20.0, SAMPLERATE/2
# create 25 log-spaced edges
edges = np.logspace(np.log10(fmin), np.log10(fmax), N_BANDS+1)
band_bins = [
    np.where((freqs >= edges[i]) & (freqs < edges[i+1]))[0]
    for i in range(N_BANDS)
]

# ─── INTERNAL STATE ─────────────────────────────────────────────────────────
_raw_l        = 0.0
_raw_h        = 0.0
_sm_l         = 0.0
_sm_h         = 0.0
_prev_above_l = False
_prev_above_h = False
_state_l      = True   # for updown toggle
_state_h      = True
_raw_bands = [0.0]*N_BANDS

# ─── AUDIO CALLBACK ─────────────────────────────────────────────────────────

def _audio_cb(indata, frames, time, status):
    """Read `frames` samples into the FFT buffer, then compute 2-band RMS plus N-band RMS."""
    global _raw_l, _raw_h, _raw_bands, _fft_buffer

    # 0) Push the raw samples into our rolling buffer for the FFT bands
    samples = indata[:,0]  # mono
    _fft_buffer.extend(samples)

    # 1) window & FFT (this is still used for raw_l/raw_h)
    mono = samples * np.hanning(frames)
    spec = np.fft.rfft(mono, n=FFT_SIZE)
    mag2 = np.abs(spec)**2

    # 2) legacy low/high bands
    _raw_l = LOW_GAIN * math.sqrt(np.mean(mag2[low_bins])) if low_bins.size else 0.0
    _raw_h = HIGH_GAIN * math.sqrt(np.mean(mag2[high_bins])) if high_bins.size else 0.0

    # 3) multi-band RMS for VU-meter or other patterns
    for idx, bins in enumerate(band_bins):
        if bins.size:
            _raw_bands[idx] = math.sqrt(np.mean(mag2[bins]))
        else:
            _raw_bands[idx] = 0.0

# start the stream once on import
_stream = sd.InputStream(
    channels=1,
    samplerate=SAMPLERATE,
    blocksize=BLOCKSIZE,
    callback=_audio_cb
)
_stream.start()

# ─── EVALUATE ENVELOPES ────────────────────────────────────────────────────
def evaluate_env():
    """
    Returns dict { 'envl':float, 'envh':float } of the CURRENT
    envelope outputs, after threshold, gain, smoothing, and mode.
    """
    global _raw_l, _raw_h, _sm_l, _sm_h
    global _prev_above_l, _prev_above_h, _state_l, _state_h

    out = {}
    for name, raw in (("envl", _raw_l), ("envh", _raw_h)):
        cfg    = ENV_CONFIG[name]
        thr_db    = cfg["threshold_db"]
        gain_db   = cfg["gain_db"]
        atk_tc = cfg["attack"]
        rel_tc = cfg["release"]
        mode   = cfg["mode"]

        # compute alphas
        dt     = BLOCKSIZE / SAMPLERATE
        alpha_a = math.exp(-dt/atk_tc)
        alpha_r = math.exp(-dt/rel_tc)

        thr_lin  = 10 ** (thr_db  / 20.0)
        gain_lin = 10 ** (gain_db / 20.0)
        # print(
        #     f"{name}: threshold {thr_db:+.1f} dB → {thr_lin:.4f} lin, "
        #     f"gain {gain_db:+.1f} dB → {gain_lin:.4f} lin"
        # )


        # select the right state vars
        if name=="envl":
            sm         = _sm_l
            prev_above = _prev_above_l
            state      = _state_l
        else:
            sm         = _sm_h
            prev_above = _prev_above_h
            state      = _state_h

        # threshold
        val = max(0.0, raw - thr_lin)

        # smoothing
        if val > sm:
            sm = (1-alpha_a)*val + alpha_a*sm
        else:
            sm = (1-alpha_r)*val + alpha_r*sm

        # mode
        above = (val > 0.0)
        if mode == "up":
            sig = sm
        elif mode == "down":
            sig = -sm
        else:  # updown: toggle on each new crossing
            if above and not prev_above:
                state = not state
            sig = sm if state else -sm

        # apply gain
        sig *= gain_lin

        # store back
        if name=="envl":
            _sm_l, _prev_above_l, _state_l = sm, above, state
        else:
            _sm_h, _prev_above_h, _state_h = sm, above, state

        out[name] = sig

    return out


# choose your FFT size and sample rate
FFT_SIZE   = 2048

# rolling input buffer (mono)
_fft_buffer = deque(maxlen=FFT_SIZE)


import math
import numpy as np

def evaluate_fft_bands(n_bands=24):
    """
    Returns a list of length `n_bands`, each ∈ [0.0 .. 1.0], by:
      • grabbing FFT_SIZE samples from _fft_buffer
      • windowing + rfft → magnitude spectrum
      • splitting into log-spaced bands 0 Hz … Nyquist
      • converting to dB (floor at –30 dB Power) and normalizing
      • falling back to the nearest bin if a band has no FFT bins
    """
    # 1) pull & pad the rolling buffer
    data = np.array(_fft_buffer, dtype=float)
    if data.size < FFT_SIZE:
        data = np.pad(data, (FFT_SIZE - data.size, 0), 'constant')

    # 2) window + FFT → magnitude spectrum (0…1)
    window = np.hanning(FFT_SIZE)
    spec   = np.abs(np.fft.rfft(data * window))
    spec  /= (spec.max() + 1e-12)

    # 3) build a log-spaced edge array of length n_bands+1
    freqs  = np.fft.rfftfreq(FFT_SIZE, 1.0 / SAMPLERATE)
    fmax   = SAMPLERATE / 2.0
    # smallest non-zero FFT bin
    fmin_nz = freqs[1] if freqs.size>1 else 0.0

    # first edge at 0, last at Nyquist
    # middle edges log‐spaced between fmin_nz and fmax
    log_edges = np.logspace(math.log10(fmin_nz), math.log10(fmax), n_bands-1)
    edges     = np.concatenate(([0.0], log_edges, [fmax]))  # shape (n_bands+1,)

    out = []
    db_floor = -30.0

    for i in range(n_bands):
        low_e, high_e = edges[i], edges[i+1]
        mask = (freqs >= low_e) & (freqs < high_e)

        if mask.any():
            m = float(spec[mask].mean())
        else:
            # fallback: pick nearest single FFT bin to the band-center
            center_f = (low_e + high_e) / 2.0
            idx = int(np.argmin(np.abs(freqs - center_f)))
            m = float(spec[idx])

        # 4) convert to dB Power and normalize
        m_db   = 10.0 * math.log10(m + 1e-12)
        m_db   = max(db_floor, m_db)
        m_norm = (m_db - db_floor) / (-db_floor)
        out.append(m_norm)

    return out