From a11c08faa4500b02e72e8b039d6acf67b8585a34 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 20 Feb 2026 23:52:28 -0800 Subject: [PATCH] Fix #2787: skip validate_frame_rate for raw PCM bytes in load_bytes `validate_frame_rate` calls `pydub.AudioSegment.from_file` which takes ~200 ms. When the caller passes raw PCM bytes (no container header) the call always fails and the exception is silently swallowed, wasting 200 ms on every invocation. Add a lightweight `_is_audio_container` helper that inspects the first few magic bytes of the input. `load_bytes` now only calls `validate_frame_rate` when the payload is a recognised container format (WAV / MP3 / OGG / FLAC / MP4). Raw PCM buffers skip the validation entirely, eliminating the latency hit. Also remove a stray `import pdb` debug statement. --- funasr/utils/load_utils.py | 40 +++++++++++++++++++++++++++++++++----- 1 file changed, 35 insertions(+), 5 deletions(-) diff --git a/funasr/utils/load_utils.py b/funasr/utils/load_utils.py index d208f7dc0..ba6ef22c3 100644 --- a/funasr/utils/load_utils.py +++ b/funasr/utils/load_utils.py @@ -15,7 +15,6 @@ from funasr.download.file import download_from_url except: print("urllib is not installed, if you infer from url, please install it first.") -import pdb import subprocess from subprocess import CalledProcessError, run @@ -144,11 +143,42 @@ def load_audio_text_image_video( return data_or_path_or_list +def _is_audio_container(data: bytes) -> bool: + """Return True if *data* starts with a recognised container-format magic header. + + Raw PCM byte streams have no header, so they will return False and the + expensive pydub/ffmpeg validation round-trip can be skipped entirely. + """ + if len(data) < 4: + return False + # WAV – RIFF....WAVE + if data[:4] == b"RIFF": + return True + # MP3 – ID3 tag or sync word (0xFF 0xEx) + if data[:3] == b"ID3" or (data[0] == 0xFF and (data[1] & 0xE0) == 0xE0): + return True + # OGG + if data[:4] == b"OggS": + return True + # FLAC + if data[:4] == b"fLaC": + return True + # MP4 / M4A / AAC – 'ftyp' box at offset 4 + if len(data) >= 8 and data[4:8] == b"ftyp": + return True + return False + + def load_bytes(input): - try: - input = validate_frame_rate(input) - except: - pass + # Only run the (expensive) frame-rate validation when the payload is an + # actual audio container (WAV, MP3, OGG, …). Raw PCM buffers have no + # recognisable header and would cause pydub to spend ~200 ms before + # raising an exception that is then silently swallowed anyway. + if _is_audio_container(input): + try: + input = validate_frame_rate(input) + except: + pass middle_data = np.frombuffer(input, dtype=np.int16) middle_data = np.asarray(middle_data) if middle_data.dtype.kind not in "iu":