Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
0a1364d
add script to create 'loud' segments
kgarwoodsdzwa Jun 6, 2025
2f3bb93
display rms and mel spectrogram together
kgarwoodsdzwa Jun 6, 2025
b1d43ab
add tools to run multiple files
kgarwoodsdzwa Aug 20, 2025
325201f
add outfile and fix filenames
kgarwoodsdzwa Aug 25, 2025
7c765fd
added docstring fix pylint
kgarwoodsdzwa Aug 25, 2025
e7469ff
add yaml file example
kgarwoodsdzwa Aug 25, 2025
7130c22
fixed linting and a logic error
kgarwoodsdzwa Aug 25, 2025
c728674
change the function name from main
kgarwoodsdzwa Aug 25, 2025
b526ad5
fix function name
kgarwoodsdzwa Aug 25, 2025
8152286
Merge pull request #77 from conservationtechlab/dev
kgarwoodsdzwa Aug 25, 2025
5c836db
reorganize with updated dev to make it be in the whoot package
kgarwoodsdzwa Aug 25, 2025
f243172
add some hard coded variables to config
kgarwoodsdzwa Aug 25, 2025
b2881aa
lint
kgarwoodsdzwa Aug 25, 2025
6a01724
remove unneeded line
kgarwoodsdzwa Aug 25, 2025
ef45a84
fix the script because import wasnt wrking
kgarwoodsdzwa Mar 3, 2026
5c42acd
adding descriptions to config values
kgarwoodsdzwa Mar 30, 2026
fdb7180
add total clips saved and make error more specific
kgarwoodsdzwa Mar 30, 2026
3b7e515
add raises to docstring for exception
kgarwoodsdzwa Mar 30, 2026
a294966
add better description
kgarwoodsdzwa Mar 30, 2026
e5e965a
disable pylint too many locals and unused e
kgarwoodsdzwa Mar 30, 2026
817cbca
flake8
kgarwoodsdzwa Mar 30, 2026
956bf74
fix pylint
kgarwoodsdzwa Mar 30, 2026
7ff40d0
add description for extract_noise
kgarwoodsdzwa Mar 30, 2026
d9c0363
save out plot
kgarwoodsdzwa Mar 30, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions cfgs/extract_noise_example.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# path to audio file directory
audio: /path/to/audio/file/dir/
# folder for resulting clipped segments
out: /path/to/output/folder/
# samples of audio per frame to analyze
frame_length: 4096
# overlap between frames, should not be larger than frame length
hop_length: 2048
# how many seconds the result clip will be
num_sec_slice: 3
19 changes: 19 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Tools for handling unlabeled raw audio.

To investigate and understand your raw audio data better,
and to be able to isolate potentially significant acoustic
events to reduce time labeling.

run_extract_noise.py will generate 3s clips from larger wav files
where the RMS of that segment exceeded the average RMS of the
entire clip. This can highlight loud events in an audio file.

extract_noise.py contains the functions used in run_extract_noise.py.
These functions include clip_loud_segments which stores clips
at a desired length if they exceed the average RMS of the entire
clip as determined by the find_peaks function.

display_rms_and_mel.py will give a visual graph with the mel
spectrogram and RMS chart for a given wav for a sanity check
and to get a better idea of what the spectrogram looks like for
a given RMS peak.
54 changes: 54 additions & 0 deletions tools/display_rms_and_mel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Display RMS and Mel-Spectrogram

For a given audio file, you can visualize the RMS and
the associated Mel-Spectrogram with the same time-step to
see how they relate. Replace the filename variable with the
path to your specific audio file.

Usage:
python3 display_rms_and_mel.py
"""
from pathlib import Path
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np


FILENAME = '<path/to/audio/file.wav>'
FRAME_LENGTH = 2048
HOP_LENGTH = 512
NUM_SECONDS_OF_SLICE = 3
SAVE_PLOT = True

sound, sr = librosa.load(FILENAME, sr=None)

clip_rms = librosa.feature.rms(y=sound,
frame_length=FRAME_LENGTH,
hop_length=HOP_LENGTH)

clip_rms = clip_rms.squeeze()
peak_rms_index = clip_rms.argmax()
print(f"Peak RMS index: {peak_rms_index}")
peak_index = peak_rms_index * HOP_LENGTH + int(FRAME_LENGTH/2)
print(f"Peak index: {peak_index}")

S, phase = librosa.magphase(librosa.stft(sound))
rms = librosa.feature.rms(S=S)
fig, ax = plt.subplots(nrows=2, sharex=True)
times = librosa.times_like(rms)
ax[0].semilogy(times, rms[0], label='RMS Energy')
ax[0].set(xticks=[])
ax[0].legend()
ax[0].label_outer()
librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
y_axis='log', x_axis='time', ax=ax[1])
ax[1].set(title='log Power spectrogram')

if SAVE_PLOT is True:
name = Path(FILENAME).stem
plot_name = name + "_RMS_plot.png"
plt.savefig(plot_name)
print(f"Saved figure to {plot_name}")

plt.show()
123 changes: 123 additions & 0 deletions tools/extract_noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""Extract noisy segments from a wav file.

Takes in a wav file and an outpath to store
the 3 second segments that contain an RMS value above
the average RMS for that wav file.
"""
import os
import librosa
import librosa.display
import numpy as np
import soundfile as sf
import audioread


def clip_loud_segments(file, config): # pylint: disable=too-many-locals
"""Extract loud segments from a wav file.

If a section of audio RMS is 1.5x above the average
RMS of the whole file, that section will be stored as
its own segment without overlapping.

Args:
file (str): The path of the current wav file.
config (str): The path to the directory to store the
loud segments.

Returns:
int: Number of clips generated
None: Only if audio file was unreadable to exit loop.

Raises:
audioread.exceptions.NoBackendError: If audio file is
not readable.
"""
index = None
filename = file
frame_length = config['frame_length']
hop_length = config['hop_length']
num_sec_slice = config['num_sec_slice']
try:
sound, sr = librosa.load(filename, sr=None)
except audioread.exceptions.NoBackendError:
print(f"skipping {file}, corrupt? Or wrong format.")
return None
print(f"sample rate: {sr}")

above_avg_rms = find_peaks(frame_length, hop_length, sound)

yes_counter = 0
start_index = None
last_right_index = 0
number_clips_saved = 0
for index, value in enumerate(above_avg_rms):
if value == 1:
if yes_counter == 0:
start_index = index
yes_counter += 1
else:
if yes_counter > 0:
mid_index = int((index - start_index) / 2)
mid_index = mid_index + start_index
real_index = mid_index * hop_length + int(frame_length/2)
half_slice_width = int(num_sec_slice * sr / 2)
left_index = max(0, real_index - half_slice_width)
if left_index > last_right_index:
right_index = real_index + half_slice_width
# left index needs to be greater than the last right
last_right_index = right_index + 1
filename = os.path.basename(file)
filename = filename.strip('.wav')
sound_slice = sound[left_index:right_index]
name = config['out'] + filename + "_" + str(index) + ".wav"
sf.write(name, sound_slice, sr)
yes_counter = 0
print(f"created {name}, setting yes_counter back to 0")
number_clips_saved += 1

if yes_counter > 0:
stop_index = index
mid_index = int((stop_index - start_index) / 2)
real_index = mid_index * hop_length + int(frame_length/2)
half_slice_width = int(num_sec_slice * sr / 2)
left_index = max(0, real_index - half_slice_width)
if left_index > last_right_index:
sound_slice = sound[left_index:stop_index]
filename = os.path.basename(file)
filename = filename.strip('.wav')
name = config['out'] + filename + "_" + str(index) + ".wav"
sf.write(name, sound_slice, sr)
return number_clips_saved


def find_peaks(frame_length, hop_length, sound):
"""Find peak RMS moments in a sound file.

Args:
frame_length (int): Window size.
hop_length (int): Overlap between frames.
sound (numpy.ndarray): The audio as a time series array.

Returns:
numpy.ndarray: The array containing each frame as an index
with values corresponding to whether that
frame exceeded the avg RMS or not.
"""
clip_rms = librosa.feature.rms(y=sound,
frame_length=frame_length,
hop_length=hop_length)

clip_rms = clip_rms.squeeze()
average_rms = np.mean(clip_rms) * (3/2)
above_avg_rms = clip_rms

for index, _ in enumerate(clip_rms):
if average_rms > clip_rms[index]:
above_avg_rms[index] = 0
else:
above_avg_rms[index] = 1

num_frames = np.sum(above_avg_rms)
print(f"num frames with above the 1.5x average rms value: {num_frames}")

return above_avg_rms
38 changes: 38 additions & 0 deletions tools/run_extract_noise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""Create segments of noisy audio from wavs.

This script uses the extract noise function to
calculate the average RMS of a given wav file,
and then creates 3 second segments where the
RMS peaked above the average. This main script
parses through a directory and sends each wav
file through the function. The extract_noise.yaml
is an example of the config file needed, copy
it and fill it out prior to running script.

Usage:

python3 run_extract_noise.py
-config /path/to/extract_noise_copy.yaml

"""
import argparse
import os
import yaml
from extract_noise import clip_loud_segments


if __name__ == "__main__":
PARSER = argparse.ArgumentParser(
description='Path to config file.'
)
PARSER.add_argument('-config', type=str,
help='Path to config.')
ARGS = PARSER.parse_args()
with open(ARGS.config, 'r', encoding='UTF-8') as f:
config = yaml.safe_load(f)
all_files = os.listdir(config['audio'])
for file in all_files:
print(f"running {file}")
saved = clip_loud_segments(os.path.join(config['audio'], file), config)
if saved is not None:
print(f"Saved {saved} clips from {file}")