conservationtechlab · kgarwoodsdzwa · Jun 6, 2025 · Jun 6, 2025 · Aug 20, 2025 · Aug 25, 2025
diff --git a/cfgs/extract_noise_example.yaml b/cfgs/extract_noise_example.yaml
@@ -0,0 +1,10 @@
+# path to audio file directory
+audio: /path/to/audio/file/dir/
+# folder for resulting clipped segments
+out: /path/to/output/folder/
+# samples of audio per frame to analyze
+frame_length: 4096
+# overlap between frames, should not be larger than frame length
+hop_length: 2048
+# how many seconds the result clip will be
+num_sec_slice: 3
diff --git a/tools/README.md b/tools/README.md
@@ -0,0 +1,19 @@
+Tools for handling unlabeled raw audio.
+
+To investigate and understand your raw audio data better, 
+and to be able to isolate potentially significant acoustic
+events to reduce time labeling.
+
+run_extract_noise.py will generate 3s clips from larger wav files
+where the RMS of that segment exceeded the average RMS of the 
+entire clip. This can highlight loud events in an audio file.
+
+extract_noise.py contains the functions used in run_extract_noise.py.
+These functions include clip_loud_segments which stores clips
+at a desired length if they exceed the average RMS of the entire
+clip as determined by the find_peaks function.
+
+display_rms_and_mel.py will give a visual graph with the mel
+spectrogram and RMS chart for a given wav for a sanity check
+and to get a better idea of what the spectrogram looks like for
+a given RMS peak.
diff --git a/tools/display_rms_and_mel.py b/tools/display_rms_and_mel.py
@@ -0,0 +1,54 @@
+"""Display RMS and Mel-Spectrogram
+
+For a given audio file, you can visualize the RMS and
+the associated Mel-Spectrogram with the same time-step to
+see how they relate. Replace the filename variable with the
+path to your specific audio file.
+
+Usage:
+    python3 display_rms_and_mel.py
+"""
+from pathlib import Path
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+import numpy as np
+
+
+FILENAME = '<path/to/audio/file.wav>'
+FRAME_LENGTH = 2048
+HOP_LENGTH = 512
+NUM_SECONDS_OF_SLICE = 3
+SAVE_PLOT = True
+
+sound, sr = librosa.load(FILENAME, sr=None)
+
+clip_rms = librosa.feature.rms(y=sound,
+                               frame_length=FRAME_LENGTH,
+                               hop_length=HOP_LENGTH)
+
+clip_rms = clip_rms.squeeze()
+peak_rms_index = clip_rms.argmax()
+print(f"Peak RMS index: {peak_rms_index}")
+peak_index = peak_rms_index * HOP_LENGTH + int(FRAME_LENGTH/2)
+print(f"Peak index: {peak_index}")
+
+S, phase = librosa.magphase(librosa.stft(sound))
+rms = librosa.feature.rms(S=S)
+fig, ax = plt.subplots(nrows=2, sharex=True)
+times = librosa.times_like(rms)
+ax[0].semilogy(times, rms[0], label='RMS Energy')
+ax[0].set(xticks=[])
+ax[0].legend()
+ax[0].label_outer()
+librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max),
+                         y_axis='log', x_axis='time', ax=ax[1])
+ax[1].set(title='log Power spectrogram')
+
+if SAVE_PLOT is True:
+    name = Path(FILENAME).stem
+    plot_name = name + "_RMS_plot.png"
+    plt.savefig(plot_name)
+    print(f"Saved figure to {plot_name}")
+
+plt.show()
diff --git a/tools/extract_noise.py b/tools/extract_noise.py
@@ -0,0 +1,123 @@
+"""Extract noisy segments from a wav file.
+
+Takes in a wav file and an outpath to store
+the 3 second segments that contain an RMS value above
+the average RMS for that wav file.
+"""
+import os
+import librosa
+import librosa.display
+import numpy as np
+import soundfile as sf
+import audioread
+
+
+def clip_loud_segments(file, config):  # pylint: disable=too-many-locals
+    """Extract loud segments from a wav file.
+
+    If a section of audio RMS is 1.5x above the average
+    RMS of the whole file, that section will be stored as
+    its own segment without overlapping.
+
+    Args:
+        file (str): The path of the current wav file.
+        config (str): The path to the directory to store the
+            loud segments.
+
+    Returns:
+        int: Number of clips generated
+        None: Only if audio file was unreadable to exit loop.
+
+    Raises:
+        audioread.exceptions.NoBackendError: If audio file is
+            not readable.
+    """
+    index = None
+    filename = file
+    frame_length = config['frame_length']
+    hop_length = config['hop_length']
+    num_sec_slice = config['num_sec_slice']
+    try:
+        sound, sr = librosa.load(filename, sr=None)
+    except audioread.exceptions.NoBackendError:
+        print(f"skipping {file}, corrupt? Or wrong format.")
+        return None
+    print(f"sample rate: {sr}")
+
+    above_avg_rms = find_peaks(frame_length, hop_length, sound)
+
+    yes_counter = 0
+    start_index = None
+    last_right_index = 0
+    number_clips_saved = 0
+    for index, value in enumerate(above_avg_rms):
+        if value == 1:
+            if yes_counter == 0:
+                start_index = index
+            yes_counter += 1
+        else:
+            if yes_counter > 0:
+                mid_index = int((index - start_index) / 2)
+                mid_index = mid_index + start_index
+                real_index = mid_index * hop_length + int(frame_length/2)
+                half_slice_width = int(num_sec_slice * sr / 2)
+                left_index = max(0, real_index - half_slice_width)
+                if left_index > last_right_index:
+                    right_index = real_index + half_slice_width
+                    # left index needs to be greater than the last right
+                    last_right_index = right_index + 1
+                    filename = os.path.basename(file)
+                    filename = filename.strip('.wav')
+                    sound_slice = sound[left_index:right_index]
+                    name = config['out'] + filename + "_" + str(index) + ".wav"
+                    sf.write(name, sound_slice, sr)
+                    yes_counter = 0
+                    print(f"created {name}, setting yes_counter back to 0")
+                    number_clips_saved += 1
+
+    if yes_counter > 0:
+        stop_index = index
+        mid_index = int((stop_index - start_index) / 2)
+        real_index = mid_index * hop_length + int(frame_length/2)
+        half_slice_width = int(num_sec_slice * sr / 2)
+        left_index = max(0, real_index - half_slice_width)
+        if left_index > last_right_index:
+            sound_slice = sound[left_index:stop_index]
+            filename = os.path.basename(file)
+            filename = filename.strip('.wav')
+            name = config['out'] + filename + "_" + str(index) + ".wav"
+            sf.write(name, sound_slice, sr)
+    return number_clips_saved
+
+
+def find_peaks(frame_length, hop_length, sound):
+    """Find peak RMS moments in a sound file.
+
+    Args:
+        frame_length (int): Window size.
+        hop_length (int): Overlap between frames.
+        sound (numpy.ndarray): The audio as a time series array.
+
+    Returns:
+        numpy.ndarray: The array containing each frame as an index
+                       with values corresponding to whether that
+                       frame exceeded the avg RMS or not.
+    """
+    clip_rms = librosa.feature.rms(y=sound,
+                                   frame_length=frame_length,
+                                   hop_length=hop_length)
+
+    clip_rms = clip_rms.squeeze()
+    average_rms = np.mean(clip_rms) * (3/2)
+    above_avg_rms = clip_rms
+
+    for index, _ in enumerate(clip_rms):
+        if average_rms > clip_rms[index]:
+            above_avg_rms[index] = 0
+        else:
+            above_avg_rms[index] = 1
+
+    num_frames = np.sum(above_avg_rms)
+    print(f"num frames with above the 1.5x average rms value: {num_frames}")
+
+    return above_avg_rms
diff --git a/tools/run_extract_noise.py b/tools/run_extract_noise.py
@@ -0,0 +1,38 @@
+"""Create segments of noisy audio from wavs.
+
+This script uses the extract noise function to
+calculate the average RMS of a given wav file,
+and then creates 3 second segments where the
+RMS peaked above the average. This main script
+parses through a directory and sends each wav
+file through the function. The extract_noise.yaml
+is an example of the config file needed, copy
+it and fill it out prior to running script.
+
+Usage:
+
+    python3 run_extract_noise.py
+    -config /path/to/extract_noise_copy.yaml
+
+"""
+import argparse
+import os
+import yaml
+from extract_noise import clip_loud_segments
+
+
+if __name__ == "__main__":
+    PARSER = argparse.ArgumentParser(
+        description='Path to config file.'
+    )
+    PARSER.add_argument('-config', type=str,
+                        help='Path to config.')
+    ARGS = PARSER.parse_args()
+    with open(ARGS.config, 'r', encoding='UTF-8') as f:
+        config = yaml.safe_load(f)
+    all_files = os.listdir(config['audio'])
+    for file in all_files:
+        print(f"running {file}")
+        saved = clip_loud_segments(os.path.join(config['audio'], file), config)
+        if saved is not None:
+            print(f"Saved {saved} clips from {file}")