diff --git a/cfgs/extract_noise_example.yaml b/cfgs/extract_noise_example.yaml new file mode 100644 index 0000000..f2bcfef --- /dev/null +++ b/cfgs/extract_noise_example.yaml @@ -0,0 +1,10 @@ +# path to audio file directory +audio: /path/to/audio/file/dir/ +# folder for resulting clipped segments +out: /path/to/output/folder/ +# samples of audio per frame to analyze +frame_length: 4096 +# overlap between frames, should not be larger than frame length +hop_length: 2048 +# how many seconds the result clip will be +num_sec_slice: 3 diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..d3de5ae --- /dev/null +++ b/tools/README.md @@ -0,0 +1,19 @@ +Tools for handling unlabeled raw audio. + +To investigate and understand your raw audio data better, +and to be able to isolate potentially significant acoustic +events to reduce time labeling. + +run_extract_noise.py will generate 3s clips from larger wav files +where the RMS of that segment exceeded the average RMS of the +entire clip. This can highlight loud events in an audio file. + +extract_noise.py contains the functions used in run_extract_noise.py. +These functions include clip_loud_segments which stores clips +at a desired length if they exceed the average RMS of the entire +clip as determined by the find_peaks function. + +display_rms_and_mel.py will give a visual graph with the mel +spectrogram and RMS chart for a given wav for a sanity check +and to get a better idea of what the spectrogram looks like for +a given RMS peak. diff --git a/tools/display_rms_and_mel.py b/tools/display_rms_and_mel.py new file mode 100644 index 0000000..db5f1bc --- /dev/null +++ b/tools/display_rms_and_mel.py @@ -0,0 +1,54 @@ +"""Display RMS and Mel-Spectrogram + +For a given audio file, you can visualize the RMS and +the associated Mel-Spectrogram with the same time-step to +see how they relate. Replace the filename variable with the +path to your specific audio file. + +Usage: + python3 display_rms_and_mel.py +""" +from pathlib import Path +import librosa +import librosa.display +import matplotlib.pyplot as plt +import numpy as np + + +FILENAME = '' +FRAME_LENGTH = 2048 +HOP_LENGTH = 512 +NUM_SECONDS_OF_SLICE = 3 +SAVE_PLOT = True + +sound, sr = librosa.load(FILENAME, sr=None) + +clip_rms = librosa.feature.rms(y=sound, + frame_length=FRAME_LENGTH, + hop_length=HOP_LENGTH) + +clip_rms = clip_rms.squeeze() +peak_rms_index = clip_rms.argmax() +print(f"Peak RMS index: {peak_rms_index}") +peak_index = peak_rms_index * HOP_LENGTH + int(FRAME_LENGTH/2) +print(f"Peak index: {peak_index}") + +S, phase = librosa.magphase(librosa.stft(sound)) +rms = librosa.feature.rms(S=S) +fig, ax = plt.subplots(nrows=2, sharex=True) +times = librosa.times_like(rms) +ax[0].semilogy(times, rms[0], label='RMS Energy') +ax[0].set(xticks=[]) +ax[0].legend() +ax[0].label_outer() +librosa.display.specshow(librosa.amplitude_to_db(S, ref=np.max), + y_axis='log', x_axis='time', ax=ax[1]) +ax[1].set(title='log Power spectrogram') + +if SAVE_PLOT is True: + name = Path(FILENAME).stem + plot_name = name + "_RMS_plot.png" + plt.savefig(plot_name) + print(f"Saved figure to {plot_name}") + +plt.show() diff --git a/tools/extract_noise.py b/tools/extract_noise.py new file mode 100644 index 0000000..dfcf76c --- /dev/null +++ b/tools/extract_noise.py @@ -0,0 +1,123 @@ +"""Extract noisy segments from a wav file. + +Takes in a wav file and an outpath to store +the 3 second segments that contain an RMS value above +the average RMS for that wav file. +""" +import os +import librosa +import librosa.display +import numpy as np +import soundfile as sf +import audioread + + +def clip_loud_segments(file, config): # pylint: disable=too-many-locals + """Extract loud segments from a wav file. + + If a section of audio RMS is 1.5x above the average + RMS of the whole file, that section will be stored as + its own segment without overlapping. + + Args: + file (str): The path of the current wav file. + config (str): The path to the directory to store the + loud segments. + + Returns: + int: Number of clips generated + None: Only if audio file was unreadable to exit loop. + + Raises: + audioread.exceptions.NoBackendError: If audio file is + not readable. + """ + index = None + filename = file + frame_length = config['frame_length'] + hop_length = config['hop_length'] + num_sec_slice = config['num_sec_slice'] + try: + sound, sr = librosa.load(filename, sr=None) + except audioread.exceptions.NoBackendError: + print(f"skipping {file}, corrupt? Or wrong format.") + return None + print(f"sample rate: {sr}") + + above_avg_rms = find_peaks(frame_length, hop_length, sound) + + yes_counter = 0 + start_index = None + last_right_index = 0 + number_clips_saved = 0 + for index, value in enumerate(above_avg_rms): + if value == 1: + if yes_counter == 0: + start_index = index + yes_counter += 1 + else: + if yes_counter > 0: + mid_index = int((index - start_index) / 2) + mid_index = mid_index + start_index + real_index = mid_index * hop_length + int(frame_length/2) + half_slice_width = int(num_sec_slice * sr / 2) + left_index = max(0, real_index - half_slice_width) + if left_index > last_right_index: + right_index = real_index + half_slice_width + # left index needs to be greater than the last right + last_right_index = right_index + 1 + filename = os.path.basename(file) + filename = filename.strip('.wav') + sound_slice = sound[left_index:right_index] + name = config['out'] + filename + "_" + str(index) + ".wav" + sf.write(name, sound_slice, sr) + yes_counter = 0 + print(f"created {name}, setting yes_counter back to 0") + number_clips_saved += 1 + + if yes_counter > 0: + stop_index = index + mid_index = int((stop_index - start_index) / 2) + real_index = mid_index * hop_length + int(frame_length/2) + half_slice_width = int(num_sec_slice * sr / 2) + left_index = max(0, real_index - half_slice_width) + if left_index > last_right_index: + sound_slice = sound[left_index:stop_index] + filename = os.path.basename(file) + filename = filename.strip('.wav') + name = config['out'] + filename + "_" + str(index) + ".wav" + sf.write(name, sound_slice, sr) + return number_clips_saved + + +def find_peaks(frame_length, hop_length, sound): + """Find peak RMS moments in a sound file. + + Args: + frame_length (int): Window size. + hop_length (int): Overlap between frames. + sound (numpy.ndarray): The audio as a time series array. + + Returns: + numpy.ndarray: The array containing each frame as an index + with values corresponding to whether that + frame exceeded the avg RMS or not. + """ + clip_rms = librosa.feature.rms(y=sound, + frame_length=frame_length, + hop_length=hop_length) + + clip_rms = clip_rms.squeeze() + average_rms = np.mean(clip_rms) * (3/2) + above_avg_rms = clip_rms + + for index, _ in enumerate(clip_rms): + if average_rms > clip_rms[index]: + above_avg_rms[index] = 0 + else: + above_avg_rms[index] = 1 + + num_frames = np.sum(above_avg_rms) + print(f"num frames with above the 1.5x average rms value: {num_frames}") + + return above_avg_rms diff --git a/tools/run_extract_noise.py b/tools/run_extract_noise.py new file mode 100644 index 0000000..27b7c93 --- /dev/null +++ b/tools/run_extract_noise.py @@ -0,0 +1,38 @@ +"""Create segments of noisy audio from wavs. + +This script uses the extract noise function to +calculate the average RMS of a given wav file, +and then creates 3 second segments where the +RMS peaked above the average. This main script +parses through a directory and sends each wav +file through the function. The extract_noise.yaml +is an example of the config file needed, copy +it and fill it out prior to running script. + +Usage: + + python3 run_extract_noise.py + -config /path/to/extract_noise_copy.yaml + +""" +import argparse +import os +import yaml +from extract_noise import clip_loud_segments + + +if __name__ == "__main__": + PARSER = argparse.ArgumentParser( + description='Path to config file.' + ) + PARSER.add_argument('-config', type=str, + help='Path to config.') + ARGS = PARSER.parse_args() + with open(ARGS.config, 'r', encoding='UTF-8') as f: + config = yaml.safe_load(f) + all_files = os.listdir(config['audio']) + for file in all_files: + print(f"running {file}") + saved = clip_loud_segments(os.path.join(config['audio'], file), config) + if saved is not None: + print(f"Saved {saved} clips from {file}")