-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaudio_processor.py
More file actions
175 lines (139 loc) · 5.56 KB
/
audio_processor.py
File metadata and controls
175 lines (139 loc) · 5.56 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
"""
Audio processing module for AudioSaR
Handles pitch shifting, reverb, and normalization effects for lo-fi remixes
"""
import os
from pydub import AudioSegment
from pydub.effects import normalize
from pedalboard import Pedalboard, Reverb
from pedalboard.io import AudioFile
import numpy as np
def get_audio_duration(file_path):
"""
Get the duration of an audio file in seconds
Args:
file_path: Path to audio file
Returns:
float: Duration in seconds, or None if error
"""
try:
audio = AudioSegment.from_file(file_path)
return len(audio) / 1000.0 # Convert milliseconds to seconds
except Exception as e:
print(f"Error getting duration: {e}")
return None
def validate_audio_file(file_path):
"""
Check if a file can be loaded and processed
Args:
file_path: Path to audio file
Returns:
tuple: (is_valid: bool, message: str)
"""
if not os.path.exists(file_path):
return False, "File does not exist"
try:
audio = AudioSegment.from_file(file_path)
if len(audio) == 0:
return False, "Audio file is empty"
return True, "Valid audio file"
except Exception as e:
return False, f"Cannot load audio file: {str(e)}"
def process_audio(input_path, output_path, pitch_shift=0, reverb_room_size=0.5,
reverb_wet_level=0.3, normalize_audio=True, target_db=-14.0):
"""
Process audio file with pitch shift, reverb, and normalization
Args:
input_path: Path to input audio file
output_path: Path to save processed audio
pitch_shift: Pitch shift in semitones (negative = lower pitch)
reverb_room_size: Reverb room size (0.0 to 1.0)
reverb_wet_level: Reverb wet/dry mix (0.0 to 1.0)
normalize_audio: Whether to normalize volume
target_db: Target volume in dBFS (only used if normalize_audio=True)
Returns:
tuple: (success: bool, message: str)
"""
try:
# Validate input file
is_valid, validation_msg = validate_audio_file(input_path)
if not is_valid:
return False, f"Validation failed: {validation_msg}"
# Load audio file
audio = AudioSegment.from_file(input_path)
# Step 1: Apply pitch shift
if pitch_shift != 0:
# Calculate new frame rate for pitch shift
# Negative semitones = lower pitch = slower playback
new_sample_rate = int(audio.frame_rate * (2.0 ** (pitch_shift / 12.0)))
# Change frame rate without resampling (pitch shift)
pitched_audio = audio._spawn(audio.raw_data, overrides={
"frame_rate": new_sample_rate
})
# Resample back to original frame rate
audio = pitched_audio.set_frame_rate(audio.frame_rate)
# Step 2: Apply reverb using pedalboard
if reverb_room_size > 0 and reverb_wet_level > 0:
# Export to temporary WAV for pedalboard processing
temp_path = output_path + ".temp.wav"
audio.export(temp_path, format="wav")
# Load with pedalboard
with AudioFile(temp_path) as f:
audio_array = f.read(f.frames)
sample_rate = f.samplerate
# Create reverb effect
board = Pedalboard([
Reverb(room_size=reverb_room_size, wet_level=reverb_wet_level)
])
# Apply effects
effected = board(audio_array, sample_rate)
# Save processed audio
with AudioFile(temp_path, 'w', sample_rate, effected.shape[0]) as f:
f.write(effected)
# Reload as AudioSegment
audio = AudioSegment.from_wav(temp_path)
# Clean up temp file
if os.path.exists(temp_path):
os.remove(temp_path)
# Step 3: Normalize volume
if normalize_audio:
audio = normalize(audio, headroom=0.1)
# Adjust to target dBFS
change_in_dBFS = target_db - audio.dBFS
audio = audio.apply_gain(change_in_dBFS)
# Export processed audio
# Determine output format from file extension
file_ext = os.path.splitext(output_path)[1].lower()
format_map = {
'.mp3': 'mp3',
'.wav': 'wav',
'.m4a': 'mp4',
'.aac': 'adts'
}
output_format = format_map.get(file_ext, 'mp3')
audio.export(output_path, format=output_format)
return True, f"Successfully processed and saved to {os.path.basename(output_path)}"
except FileNotFoundError:
return False, "FFmpeg not found. Please install FFmpeg to process audio files."
except Exception as e:
return False, f"Processing error: {str(e)}"
def get_processed_filename(original_filename, output_dir, avoid_overwrite=True):
"""
Generate a filename for processed audio, avoiding overwrite if needed
Args:
original_filename: Original file name
output_dir: Output directory path
avoid_overwrite: If True, append counter to avoid overwriting existing files
Returns:
str: Full path to output file
"""
name, ext = os.path.splitext(original_filename)
base_output_name = f"{name}_processed{ext}"
output_path = os.path.join(output_dir, base_output_name)
if avoid_overwrite:
counter = 1
while os.path.exists(output_path):
output_name = f"{name}_processed_{counter}{ext}"
output_path = os.path.join(output_dir, output_name)
counter += 1
return output_path