From 47f976aeea86c7f87b08d7dc3ec004b1513dc15b Mon Sep 17 00:00:00 2001
From: recipes-bot <noreply@deepgram.com>
Date: Mon, 11 May 2026 17:19:52 +0000
Subject: [PATCH] =?UTF-8?q?feat(python):=20voice-agents=20v1=20=E2=80=94?=
 =?UTF-8?q?=20latency-profiling?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../v1/latency-profiling/README.md            | 57 +++++++++++++++++++
 .../v1/latency-profiling/example.py           | 50 ++++++++++++++++
 .../v1/latency-profiling/example_test.py      | 16 ++++++
 3 files changed, 123 insertions(+)
 create mode 100644 recipes/python/voice-agents/v1/latency-profiling/README.md
 create mode 100644 recipes/python/voice-agents/v1/latency-profiling/example.py
 create mode 100644 recipes/python/voice-agents/v1/latency-profiling/example_test.py

diff --git a/recipes/python/voice-agents/v1/latency-profiling/README.md b/recipes/python/voice-agents/v1/latency-profiling/README.md
new file mode 100644
index 00000000..e1f39700
--- /dev/null
+++ b/recipes/python/voice-agents/v1/latency-profiling/README.md
@@ -0,0 +1,57 @@
+# Latency Profiling (Voice Agents v1)
+
+Measure time-to-first-byte (TTFB) for each stage of the voice agent pipeline — think (LLM) and speak (TTS) — to identify bottlenecks and optimize end-to-end response time.
+
+## What it does
+
+Opens a voice agent WebSocket session and injects a text message. As the agent processes the turn through its listen → think → speak pipeline, the example timestamps each stage event (`AgentThinking`, first audio byte, `AgentStartedSpeaking`, `AgentAudioDone`) and prints a per-component latency breakdown.
+
+This profiling pattern helps developers:
+- Identify which pipeline stage contributes the most latency
+- Compare latency across different model configurations (swap `gpt-4o-mini` for another LLM, or change the TTS voice)
+- Establish baseline metrics for production voice agent deployments
+
+## Key parameters
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `listen.provider.model` | `"nova-3"` | STT model for the listen stage |
+| `think.provider.model` | `"gpt-4o-mini"` | LLM model for the think stage |
+| `think.prompt` | `"Reply in one short sentence."` | Short prompt to keep responses fast |
+| `speak.provider.model` | `"aura-2-thalia-en"` | TTS model for the speak stage |
+
+## Measured events
+
+| Metric | What it measures |
+|--------|-----------------|
+| **Think TTFB** | Time from user message injection to `AgentThinking` event |
+| **TTS TTFB** | Time from `AgentThinking` to first audio byte received |
+| **Speak duration** | Time from `AgentStartedSpeaking` to `AgentAudioDone` |
+| **Total turn** | End-to-end time from injection to audio completion |
+
+## Example output
+
+```
+Think TTFB:        312 ms
+TTS TTFB:          245 ms
+Speak duration:    987 ms
+Total turn:       1544 ms
+```
+
+## Prerequisites
+
+- Python 3.10+
+- Set `DEEPGRAM_API_KEY` environment variable
+- Install: `pip install -r recipes/python/requirements.txt`
+
+## Run
+
+```bash
+python example.py
+```
+
+## Test
+
+```bash
+pytest example_test.py -v
+```
diff --git a/recipes/python/voice-agents/v1/latency-profiling/example.py b/recipes/python/voice-agents/v1/latency-profiling/example.py
new file mode 100644
index 00000000..c39e9d29
--- /dev/null
+++ b/recipes/python/voice-agents/v1/latency-profiling/example.py
@@ -0,0 +1,50 @@
+"""Latency Profiling — measure per-component TTFB in a voice agent turn."""
+import threading, time
+from deepgram import DeepgramClient
+from deepgram.agent.v1.types import (
+    AgentV1InjectUserMessage, AgentV1Settings, AgentV1SettingsAgent,
+    AgentV1SettingsAgentListen, AgentV1SettingsAgentListenProvider_V1,
+    AgentV1SettingsAudio, AgentV1SettingsAudioInput)
+from deepgram.core.events import EventType
+from deepgram.types.speak_settings_v1 import SpeakSettingsV1
+from deepgram.types.speak_settings_v1provider import SpeakSettingsV1Provider_Deepgram
+from deepgram.types.think_settings_v1 import ThinkSettingsV1
+from deepgram.types.think_settings_v1provider import ThinkSettingsV1Provider_OpenAi
+
+client = DeepgramClient()
+ts, done = {}, threading.Event()
+with client.agent.v1.connect() as agent:
+    settings = AgentV1Settings(
+        audio=AgentV1SettingsAudio(
+            input=AgentV1SettingsAudioInput(encoding="linear16", sample_rate=24000)),
+        agent=AgentV1SettingsAgent(
+            listen=AgentV1SettingsAgentListen(provider=AgentV1SettingsAgentListenProvider_V1(
+                type="deepgram", model="nova-3")),
+            think=ThinkSettingsV1(provider=ThinkSettingsV1Provider_OpenAi(
+                type="open_ai", model="gpt-4o-mini"), prompt="Reply in one short sentence."),
+            speak=SpeakSettingsV1(provider=SpeakSettingsV1Provider_Deepgram(
+                type="deepgram", model="aura-2-thalia-en"))))
+    def on_message(msg):
+        now = time.perf_counter()
+        if isinstance(msg, bytes):
+            ts.setdefault("tts_fb", now); return
+        t = getattr(msg, "type", "")
+        if t == "SettingsApplied":
+            ts["inject"] = now
+            agent.send_inject_user_message(AgentV1InjectUserMessage(
+                content="What is the deepest ocean trench?"))
+        elif t == "AgentThinking":
+            ts["think"] = now
+        elif t == "AgentStartedSpeaking":
+            ts["speak"] = now
+        elif t == "AgentAudioDone":
+            i, th, sp = ts.get("inject", now), ts.get("think", now), ts.get("speak", now)
+            print(f"Think TTFB:     {(th - i)*1000:7.0f} ms")
+            print(f"TTS TTFB:       {(ts.get('tts_fb', now) - th)*1000:7.0f} ms")
+            print(f"Speak duration: {(now - sp)*1000:7.0f} ms")
+            print(f"Total turn:     {(now - i)*1000:7.0f} ms")
+            done.set()
+    agent.on(EventType.MESSAGE, on_message)
+    threading.Thread(target=agent.start_listening, daemon=True).start()
+    agent.send_settings(settings)
+    done.wait(timeout=30)
diff --git a/recipes/python/voice-agents/v1/latency-profiling/example_test.py b/recipes/python/voice-agents/v1/latency-profiling/example_test.py
new file mode 100644
index 00000000..ff1876f6
--- /dev/null
+++ b/recipes/python/voice-agents/v1/latency-profiling/example_test.py
@@ -0,0 +1,16 @@
+import subprocess
+from pathlib import Path
+
+def test_example_runs():
+    """Runs the latency profiling example and verifies it produces output."""
+    example = Path(__file__).parent / "example.py"
+    result = subprocess.run(
+        ["python", str(example)],
+        capture_output=True,
+        text=True,
+        timeout=60,
+    )
+    assert result.returncode == 0, (
+        f"Example failed\nSTDOUT: {result.stdout}\nSTDERR: {result.stderr}"
+    )
+    assert result.stdout.strip(), "Example produced no output"