From 47f976aeea86c7f87b08d7dc3ec004b1513dc15b Mon Sep 17 00:00:00 2001 From: recipes-bot Date: Mon, 11 May 2026 17:19:52 +0000 Subject: [PATCH] =?UTF-8?q?feat(python):=20voice-agents=20v1=20=E2=80=94?= =?UTF-8?q?=20latency-profiling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../v1/latency-profiling/README.md | 57 +++++++++++++++++++ .../v1/latency-profiling/example.py | 50 ++++++++++++++++ .../v1/latency-profiling/example_test.py | 16 ++++++ 3 files changed, 123 insertions(+) create mode 100644 recipes/python/voice-agents/v1/latency-profiling/README.md create mode 100644 recipes/python/voice-agents/v1/latency-profiling/example.py create mode 100644 recipes/python/voice-agents/v1/latency-profiling/example_test.py diff --git a/recipes/python/voice-agents/v1/latency-profiling/README.md b/recipes/python/voice-agents/v1/latency-profiling/README.md new file mode 100644 index 00000000..e1f39700 --- /dev/null +++ b/recipes/python/voice-agents/v1/latency-profiling/README.md @@ -0,0 +1,57 @@ +# Latency Profiling (Voice Agents v1) + +Measure time-to-first-byte (TTFB) for each stage of the voice agent pipeline — think (LLM) and speak (TTS) — to identify bottlenecks and optimize end-to-end response time. + +## What it does + +Opens a voice agent WebSocket session and injects a text message. As the agent processes the turn through its listen → think → speak pipeline, the example timestamps each stage event (`AgentThinking`, first audio byte, `AgentStartedSpeaking`, `AgentAudioDone`) and prints a per-component latency breakdown. + +This profiling pattern helps developers: +- Identify which pipeline stage contributes the most latency +- Compare latency across different model configurations (swap `gpt-4o-mini` for another LLM, or change the TTS voice) +- Establish baseline metrics for production voice agent deployments + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `listen.provider.model` | `"nova-3"` | STT model for the listen stage | +| `think.provider.model` | `"gpt-4o-mini"` | LLM model for the think stage | +| `think.prompt` | `"Reply in one short sentence."` | Short prompt to keep responses fast | +| `speak.provider.model` | `"aura-2-thalia-en"` | TTS model for the speak stage | + +## Measured events + +| Metric | What it measures | +|--------|-----------------| +| **Think TTFB** | Time from user message injection to `AgentThinking` event | +| **TTS TTFB** | Time from `AgentThinking` to first audio byte received | +| **Speak duration** | Time from `AgentStartedSpeaking` to `AgentAudioDone` | +| **Total turn** | End-to-end time from injection to audio completion | + +## Example output + +``` +Think TTFB: 312 ms +TTS TTFB: 245 ms +Speak duration: 987 ms +Total turn: 1544 ms +``` + +## Prerequisites + +- Python 3.10+ +- Set `DEEPGRAM_API_KEY` environment variable +- Install: `pip install -r recipes/python/requirements.txt` + +## Run + +```bash +python example.py +``` + +## Test + +```bash +pytest example_test.py -v +``` diff --git a/recipes/python/voice-agents/v1/latency-profiling/example.py b/recipes/python/voice-agents/v1/latency-profiling/example.py new file mode 100644 index 00000000..c39e9d29 --- /dev/null +++ b/recipes/python/voice-agents/v1/latency-profiling/example.py @@ -0,0 +1,50 @@ +"""Latency Profiling — measure per-component TTFB in a voice agent turn.""" +import threading, time +from deepgram import DeepgramClient +from deepgram.agent.v1.types import ( + AgentV1InjectUserMessage, AgentV1Settings, AgentV1SettingsAgent, + AgentV1SettingsAgentListen, AgentV1SettingsAgentListenProvider_V1, + AgentV1SettingsAudio, AgentV1SettingsAudioInput) +from deepgram.core.events import EventType +from deepgram.types.speak_settings_v1 import SpeakSettingsV1 +from deepgram.types.speak_settings_v1provider import SpeakSettingsV1Provider_Deepgram +from deepgram.types.think_settings_v1 import ThinkSettingsV1 +from deepgram.types.think_settings_v1provider import ThinkSettingsV1Provider_OpenAi + +client = DeepgramClient() +ts, done = {}, threading.Event() +with client.agent.v1.connect() as agent: + settings = AgentV1Settings( + audio=AgentV1SettingsAudio( + input=AgentV1SettingsAudioInput(encoding="linear16", sample_rate=24000)), + agent=AgentV1SettingsAgent( + listen=AgentV1SettingsAgentListen(provider=AgentV1SettingsAgentListenProvider_V1( + type="deepgram", model="nova-3")), + think=ThinkSettingsV1(provider=ThinkSettingsV1Provider_OpenAi( + type="open_ai", model="gpt-4o-mini"), prompt="Reply in one short sentence."), + speak=SpeakSettingsV1(provider=SpeakSettingsV1Provider_Deepgram( + type="deepgram", model="aura-2-thalia-en")))) + def on_message(msg): + now = time.perf_counter() + if isinstance(msg, bytes): + ts.setdefault("tts_fb", now); return + t = getattr(msg, "type", "") + if t == "SettingsApplied": + ts["inject"] = now + agent.send_inject_user_message(AgentV1InjectUserMessage( + content="What is the deepest ocean trench?")) + elif t == "AgentThinking": + ts["think"] = now + elif t == "AgentStartedSpeaking": + ts["speak"] = now + elif t == "AgentAudioDone": + i, th, sp = ts.get("inject", now), ts.get("think", now), ts.get("speak", now) + print(f"Think TTFB: {(th - i)*1000:7.0f} ms") + print(f"TTS TTFB: {(ts.get('tts_fb', now) - th)*1000:7.0f} ms") + print(f"Speak duration: {(now - sp)*1000:7.0f} ms") + print(f"Total turn: {(now - i)*1000:7.0f} ms") + done.set() + agent.on(EventType.MESSAGE, on_message) + threading.Thread(target=agent.start_listening, daemon=True).start() + agent.send_settings(settings) + done.wait(timeout=30) diff --git a/recipes/python/voice-agents/v1/latency-profiling/example_test.py b/recipes/python/voice-agents/v1/latency-profiling/example_test.py new file mode 100644 index 00000000..ff1876f6 --- /dev/null +++ b/recipes/python/voice-agents/v1/latency-profiling/example_test.py @@ -0,0 +1,16 @@ +import subprocess +from pathlib import Path + +def test_example_runs(): + """Runs the latency profiling example and verifies it produces output.""" + example = Path(__file__).parent / "example.py" + result = subprocess.run( + ["python", str(example)], + capture_output=True, + text=True, + timeout=60, + ) + assert result.returncode == 0, ( + f"Example failed\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}" + ) + assert result.stdout.strip(), "Example produced no output"