diff --git a/pyproject.toml b/pyproject.toml index ec59536f..82962a17 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,27 +19,19 @@ readme = "README.md" requires-python = ">=3.11,<3.14" license = "MIT" dependencies = [ + # --- Core (framework-agnostic) --- "pydantic>=2.0", "pydantic-settings>=2.0", - "pipecat-ai==0.0.104", "elevenlabs>=1.0.0", "openai>=1.0.0", "anthropic>=0.83.0", "litellm>=1.30.0", - "deepgram-sdk>=3.5.0,<4.0.0", - "onnxruntime>=1.16.0", "aioboto3>=12.0.0", "google-generativeai>=0.3.0", "google-genai>=0.3.0", - "google-cloud-speech>=2.0.0", - "google-cloud-texttospeech>=2.0.0", - "azure-cognitiveservices-speech>=1.31.0", - "cartesia>=1.0.0", - "assemblyai>=0.17.0", "setuptools>=65.0.0", "fastapi>=0.100.0", "uvicorn>=0.23.0", - "websockets>=12.0", "httpx>=0.25.0", "pyyaml>=6.0", "pandas>=2.0", @@ -54,10 +46,27 @@ dependencies = [ "jaconv>=0.3.0", "regex>=2023.0.0", "more-itertools>=10.0.0", - "nvidia-riva-client>=2.25.0,<3.0.0" ] [project.optional-dependencies] +# Voice framework: choose one (pipecat or roomkit) +pipecat = [ + "pipecat-ai==0.0.104", + "deepgram-sdk>=3.5.0,<4.0.0", + "onnxruntime>=1.16.0", + "google-cloud-speech>=2.0.0", + "google-cloud-texttospeech>=2.0.0", + "azure-cognitiveservices-speech>=1.31.0", + "cartesia>=1.0.0", + "assemblyai>=0.17.0", + "websockets>=12.0,<16.0", + "nvidia-riva-client>=2.25.0,<3.0.0", +] +roomkit = [ + "roomkit>=0.7.0a2; python_version>='3.12'", + "deepgram-sdk>=6.0.0", + "websockets>=16.0", +] dev = [ "pytest>=7.0", "pytest-asyncio>=0.21", @@ -76,6 +85,11 @@ apps = [ [project.scripts] eva = "eva.cli:main" +[tool.uv] +# pipecat and roomkit have incompatible dependency versions +# (deepgram-sdk v3 vs v6, websockets <16 vs >=16) +conflicts = [[{extra = "pipecat"}, {extra = "roomkit"}]] + [tool.hatch.build.targets.wheel] packages = ["src/eva"] diff --git a/src/eva/assistant/__init__.py b/src/eva/assistant/__init__.py index 5cd2037b..3d6aad98 100644 --- a/src/eva/assistant/__init__.py +++ b/src/eva/assistant/__init__.py @@ -1 +1,37 @@ """Assistant components for the voice agent benchmark framework.""" + +import importlib + +from eva.assistant.base import AssistantServerBase + +# Registry of available assistant frameworks. +# Each entry maps a framework name to the fully-qualified class path. +# Lazy imports prevent loading framework dependencies (e.g. Pipecat) +# when a different framework is selected. +_REGISTRY: dict[str, str] = { + "pipecat": "eva.assistant.server.PipecatAssistantServer", + "roomkit": "eva.assistant.roomkit_server.RoomKitAssistantServer", +} + + +def create_assistant_server(framework: str, **kwargs) -> AssistantServerBase: + """Create an assistant server for the given framework. + + Args: + framework: Framework identifier (e.g. ``"pipecat"``, ``"roomkit"``). + **kwargs: Arguments forwarded to the server constructor. + + Returns: + An ``AssistantServerBase`` implementation. + + Raises: + ValueError: If the framework is not registered. + """ + if framework not in _REGISTRY: + available = ", ".join(sorted(_REGISTRY)) + raise ValueError(f"Unknown assistant framework '{framework}'. Available: {available}") + + module_path, class_name = _REGISTRY[framework].rsplit(".", 1) + module = importlib.import_module(module_path) + cls = getattr(module, class_name) + return cls(**kwargs) diff --git a/src/eva/assistant/base.py b/src/eva/assistant/base.py new file mode 100644 index 00000000..c01608c6 --- /dev/null +++ b/src/eva/assistant/base.py @@ -0,0 +1,91 @@ +"""Abstract base class for assistant servers in the EVA benchmark framework. + +Any voice framework (Pipecat, RoomKit, etc.) must implement this interface +so that the orchestrator can run conversations framework-agnostically. +""" + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any + +from eva.models.agents import AgentConfig +from eva.models.config import AudioLLMConfig, PipelineConfig, SpeechToSpeechConfig + + +class AssistantServerBase(ABC): + """Contract between ConversationWorker and any voice framework. + + Implementations must: + - Listen for WebSocket connections on the assigned port using the + Twilio-style JSON protocol (events: connected, start, media, stop) + with 8 kHz mu-law audio encoding. + - Write all required output files to ``output_dir`` when ``stop()`` + is called. + - Expose conversation statistics via ``get_conversation_stats()``. + """ + + @abstractmethod + def __init__( + self, + current_date_time: str, + pipeline_config: PipelineConfig | SpeechToSpeechConfig | AudioLLMConfig, + agent: AgentConfig, + agent_config_path: str, + scenario_db_path: str, + output_dir: Path, + port: int, + conversation_id: str, + ) -> None: + """Initialize the assistant server. + + Args: + current_date_time: Current date/time string from the evaluation record. + pipeline_config: Model pipeline configuration (STT+LLM+TTS, S2S, or Audio-LLM). + agent: Agent configuration loaded from YAML. + agent_config_path: Path to the agent YAML configuration file. + scenario_db_path: Path to the scenario database JSON file. + output_dir: Directory where output files must be written. + port: WebSocket port to listen on. + conversation_id: Unique identifier for this conversation. + """ + ... + + @abstractmethod + async def start(self) -> None: + """Start the WebSocket server. + + Must block until the server is accepting connections on the assigned port. + """ + ... + + @abstractmethod + async def stop(self) -> None: + """Stop the server and write all output files to ``output_dir``. + + Required output files: + - ``audit_log.json`` — structured conversation log + - ``transcript.jsonl`` — ``{timestamp, role, content}`` per line + - ``initial_scenario_db.json`` — scenario DB snapshot before conversation + - ``final_scenario_db.json`` — scenario DB snapshot after conversation + - ``audio_mixed.wav`` — mixed user+assistant audio (16-bit PCM) + - ``audio_user.wav`` — user-only audio track + - ``audio_assistant.wav`` — assistant-only audio track + - ``framework_logs.jsonl`` — JSONL with event types: + ``tts_text``, ``llm_response``, ``turn_start``, ``turn_end`` + - ``response_latencies.json`` — ``{latencies, mean, max, count}`` + + Optional (framework-specific): + - ``pipecat_metrics.jsonl`` or equivalent latency metrics + """ + ... + + @abstractmethod + def get_conversation_stats(self) -> dict[str, Any]: + """Return conversation statistics. + + Must include at minimum: + - ``num_turns``: number of user turns + - ``num_tool_calls``: total tool invocations + - ``tools_called``: list of tool names invoked + """ + ... diff --git a/src/eva/assistant/pipeline/observers.py b/src/eva/assistant/pipeline/observers.py index df1a50d5..09b17ebe 100644 --- a/src/eva/assistant/pipeline/observers.py +++ b/src/eva/assistant/pipeline/observers.py @@ -74,7 +74,7 @@ def __init__(self, output_path: str, conversation_id: str, clock: WallClock, tur self.output_path = Path(output_path) self.conversation_id = conversation_id self.clock = clock - self.log_file = self.output_path / "pipecat_logs.jsonl" + self.log_file = self.output_path / "framework_logs.jsonl" # Ensure output directory exists self.output_path.mkdir(parents=True, exist_ok=True) diff --git a/src/eva/assistant/roomkit_server.py b/src/eva/assistant/roomkit_server.py new file mode 100644 index 00000000..f6ad74b2 --- /dev/null +++ b/src/eva/assistant/roomkit_server.py @@ -0,0 +1,532 @@ +"""RoomKit-based assistant server for voice conversations. + +Uses RoomKit's VoiceChannel for the voice pipeline (STT, TTS, VAD, +turn detection, audio recording) while reusing EVA's AgenticSystem, +AuditLog, and ToolExecutor for LLM reasoning and tool execution. +""" + +import asyncio +import json +import os +import shutil +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import uvicorn +from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from roomkit import HookResult, HookTrigger, RoomKit, VoiceChannel +from roomkit.voice.backends.twilio_ws import TwilioWebSocketBackend +from roomkit.voice.pipeline import ( + AudioPipelineConfig, + RecordingChannelMode, + RecordingConfig, + WavFileRecorder, +) + +from eva.assistant.agentic.audit_log import AuditLog +from eva.assistant.agentic.system import AgenticSystem +from eva.assistant.base import AssistantServerBase +from eva.assistant.services.llm import LiteLLMClient +from eva.assistant.tools.tool_executor import ToolExecutor +from eva.models.agents import AgentConfig +from eva.models.config import AudioLLMConfig, PipelineConfig, SpeechToSpeechConfig +from eva.utils.logging import get_logger + +logger = get_logger(__name__) + +SAMPLE_RATE = 24000 +INITIAL_MESSAGE = "Hello! How can I help you today?" + + +# --------------------------------------------------------------------------- +# STT/TTS provider factories +# --------------------------------------------------------------------------- + +_STT_PROVIDERS: dict[str, str] = { + "deepgram": "roomkit.voice.stt.deepgram", + "openai_whisper": "roomkit.voice.stt.openai", +} + +_TTS_PROVIDERS: dict[str, str] = { + "elevenlabs": "roomkit.voice.tts.elevenlabs", +} + + +def _create_stt_provider(config: PipelineConfig) -> Any: + """Create a RoomKit STT provider from EVA's pipeline config.""" + name = config.stt or "deepgram" + + if name not in _STT_PROVIDERS: + raise ValueError(f"Unsupported STT for RoomKit: {name}. Available: {sorted(_STT_PROVIDERS)}") + + if name == "deepgram": + from roomkit.voice.stt.deepgram import DeepgramConfig, DeepgramSTTProvider + + api_key = os.environ.get("DEEPGRAM_API_KEY", "") + return DeepgramSTTProvider( + config=DeepgramConfig( + api_key=api_key, + model="nova-2", + language="en", + punctuate=True, + smart_format=True, + endpointing=300, + ) + ) + + raise ValueError(f"STT provider '{name}' registered but not implemented") + + +def _create_tts_provider(config: PipelineConfig) -> Any: + """Create a RoomKit TTS provider from EVA's pipeline config.""" + name = config.tts or "elevenlabs" + + if name not in _TTS_PROVIDERS: + raise ValueError(f"Unsupported TTS for RoomKit: {name}. Available: {sorted(_TTS_PROVIDERS)}") + + if name == "elevenlabs": + from roomkit.voice.tts.elevenlabs import ElevenLabsConfig, ElevenLabsTTSProvider + + api_key = os.environ.get("ELEVENLABS_API_KEY", "") + return ElevenLabsTTSProvider( + config=ElevenLabsConfig( + api_key=api_key, + voice_id="21m00Tcm4TlvDq8ikWAM", + model_id="eleven_multilingual_v2", + output_format=f"pcm_{SAMPLE_RATE}", + optimize_streaming_latency=3, + ) + ) + + raise ValueError(f"TTS provider '{name}' registered but not implemented") + + +# --------------------------------------------------------------------------- +# Main server +# --------------------------------------------------------------------------- + + +class RoomKitAssistantServer(AssistantServerBase): + """RoomKit-based implementation of the assistant server. + + Uses RoomKit's VoiceChannel for the voice pipeline (STT, TTS, VAD, + turn detection, audio recording) and EVA's AgenticSystem for LLM + reasoning and tools. Currently supports PipelineConfig (STT+LLM+TTS) + mode only. + """ + + def __init__( + self, + current_date_time: str, + pipeline_config: PipelineConfig | SpeechToSpeechConfig | AudioLLMConfig, + agent: AgentConfig, + agent_config_path: str, + scenario_db_path: str, + output_dir: Path, + port: int, + conversation_id: str, + ) -> None: + if not isinstance(pipeline_config, PipelineConfig): + raise NotImplementedError( + "RoomKit server currently supports PipelineConfig (STT+LLM+TTS) only. " + "SpeechToSpeech and AudioLLM modes are not yet implemented." + ) + + self.pipeline_config = pipeline_config + self.agent = agent + self.output_dir = Path(output_dir) + self.port = port + self.conversation_id = conversation_id + self.current_date_time = current_date_time + + # --- EVA components (Pipecat-free, reusable) --- + self.audit_log = AuditLog() + self.tool_handler = ToolExecutor( + tool_config_path=agent_config_path, + scenario_db_path=scenario_db_path, + tool_module_path=agent.tool_module_path, + current_date_time=current_date_time, + ) + llm_client = LiteLLMClient(model=pipeline_config.llm) + self.agentic_system = AgenticSystem( + current_date_time=current_date_time, + agent=agent, + tool_handler=self.tool_handler, + audit_log=self.audit_log, + llm_client=llm_client, + output_dir=output_dir, + ) + + # --- RoomKit components --- + self.kit = RoomKit() + self.backend = TwilioWebSocketBackend(output_sample_rate=SAMPLE_RATE) + stt = _create_stt_provider(pipeline_config) + tts = _create_tts_provider(pipeline_config) + + # Audio recording via RoomKit's WavFileRecorder + self._recorder = WavFileRecorder() + self._recording_config = RecordingConfig( + channels=RecordingChannelMode.ALL, + storage=str(self.output_dir), + ) + + pipeline = AudioPipelineConfig( + recorder=self._recorder, + recording_config=self._recording_config, + ) + + self.voice = VoiceChannel( + "voice", + stt=stt, + tts=tts, + backend=self.backend, + pipeline=pipeline, + enable_barge_in=False, + ) + self.kit.register_channel(self.voice) + + # --- Server state --- + self._app: FastAPI | None = None + self._server: uvicorn.Server | None = None + self._server_task: asyncio.Task | None = None + self._running = False + self._session = None + self._current_query_task: asyncio.Task | None = None + + # --- Framework logging --- + self._framework_logs: list[dict] = [] + self._latency_measurements: list[float] = [] + self._was_interrupted = False + self._user_speech_end_ts: float | None = None + + # ----------------------------------------------------------------------- + # AssistantServerBase interface + # ----------------------------------------------------------------------- + + async def start(self) -> None: + if self._running: + return + + self.output_dir.mkdir(parents=True, exist_ok=True) + self._register_hooks() + + self._app = FastAPI() + + @self._app.websocket("/ws") + async def websocket_endpoint(websocket: WebSocket): + await websocket.accept() + await self._handle_session(websocket) + + @self._app.websocket("/") + async def websocket_root(websocket: WebSocket): + await websocket.accept() + await self._handle_session(websocket) + + config = uvicorn.Config( + self._app, host="0.0.0.0", port=self.port, log_level="warning", lifespan="off", + ) + self._server = uvicorn.Server(config) + self._running = True + self._server_task = asyncio.create_task(self._server.serve()) + + while not self._server.started: + await asyncio.sleep(0.01) + + logger.info(f"RoomKit assistant server started on ws://localhost:{self.port}") + + async def stop(self) -> None: + if not self._running: + return + self._running = False + + if self._current_query_task and not self._current_query_task.done(): + self._current_query_task.cancel() + + try: + await self.kit.close() + except Exception as e: + logger.warning(f"Error closing RoomKit: {e}") + + if self._server: + self._server.should_exit = True + if self._server_task: + try: + await asyncio.wait_for(self._server_task, timeout=5.0) + except (asyncio.TimeoutError, asyncio.CancelledError): + if self._server_task: + self._server_task.cancel() + try: + await self._server_task + except asyncio.CancelledError: + pass + self._server = None + self._server_task = None + + await self._save_outputs() + logger.info(f"RoomKit assistant server stopped on port {self.port}") + + def get_conversation_stats(self) -> dict[str, Any]: + return self.audit_log.get_stats() + + # ----------------------------------------------------------------------- + # RoomKit hooks + # ----------------------------------------------------------------------- + + def _register_hooks(self) -> None: + @self.kit.hook(HookTrigger.ON_SPEECH_START) + async def on_speech_start(event, ctx): + self._was_interrupted = False + self._log_event("turn_start", {"frame": ""}) + return HookResult.allow() + + @self.kit.hook(HookTrigger.ON_SPEECH_END) + async def on_speech_end(event, ctx): + self._user_speech_end_ts = time.time() + self._log_event("turn_end", {"frame": "", "was_interrupted": self._was_interrupted}) + return HookResult.allow() + + @self.kit.hook(HookTrigger.ON_TRANSCRIPTION) + async def on_transcription(event, ctx): + text = event.text if hasattr(event, "text") else str(event) + if not text or not text.strip(): + return HookResult.allow() + + logger.info(f"User said: {text}") + self._save_transcript_entry("user", text) + + # Cancel any in-progress query before starting a new one + if self._current_query_task and not self._current_query_task.done(): + self._current_query_task.cancel() + try: + await self._current_query_task + except asyncio.CancelledError: + pass + + # AgenticSystem handles audit_log internally + self._current_query_task = asyncio.create_task(self._process_query(text)) + return HookResult.allow() + + @self.kit.hook(HookTrigger.BEFORE_TTS) + async def before_tts(text, ctx): + if self._user_speech_end_ts: + latency = time.time() - self._user_speech_end_ts + self._latency_measurements.append(latency) + self._user_speech_end_ts = None + self._log_event("tts_text", {"frame": text}) + return HookResult.allow() + + @self.kit.hook(HookTrigger.ON_BARGE_IN) + async def on_barge_in(event, ctx): + self._was_interrupted = True + if self._current_query_task and not self._current_query_task.done(): + self._current_query_task.cancel() + return HookResult.allow() + + # ----------------------------------------------------------------------- + # Session handling + # ----------------------------------------------------------------------- + + async def _handle_session(self, websocket: WebSocket) -> None: + logger.info("Client connected to RoomKit assistant server") + try: + room = await self.kit.create_room(room_id=self.conversation_id) + await self.kit.attach_channel(room.id, "voice") + session = await self.backend.connect(room.id, "user", "voice") + self._session = session + self.backend.bind_websocket(websocket) + await self.kit.join(room.id, "voice", session=session) + self.backend.notify_session_ready(session) + + # Recording starts automatically via pipeline.on_session_started() + + # Send greeting concurrently with WebSocket loop. The backend's + # write queue ensures send_json() never blocks receive_json(). + async def _greet(): + await self.voice.say(session, INITIAL_MESSAGE) + self._save_transcript_entry("assistant", INITIAL_MESSAGE) + self._log_event("tts_text", {"frame": INITIAL_MESSAGE}) + + greeting_task = asyncio.create_task(_greet()) + await self._websocket_loop(websocket, session) + if not greeting_task.done(): + greeting_task.cancel() + + except WebSocketDisconnect: + logger.info("Client disconnected") + except Exception as e: + logger.error(f"Session error: {e}", exc_info=True) + finally: + # Stop recording — pipeline manages the handle + self._recorder.close() + + if self._session: + try: + await self.backend.disconnect(self._session) + except Exception: + pass + + if self.agentic_system: + try: + self.agentic_system.save_agent_perf_stats() + except Exception as e: + logger.error(f"Error saving agent perf stats: {e}") + + self._save_latencies() + logger.info("Session ended") + + async def _websocket_loop(self, websocket: WebSocket, session) -> None: + """Read Twilio-protocol messages and feed audio to RoomKit. + + Uses a dedicated reader task + queue so that inbound reads never + compete with outbound sends (greeting TTS / assistant responses) + for event loop time. Without this, outbound send_json() calls + starve the receive side, causing inbound frames to buffer in TCP + and arrive as bursts — producing choppy audio in recordings. + """ + audio_queue: asyncio.Queue = asyncio.Queue() + + async def _reader(): + """Dedicated WebSocket reader — never yields to send operations.""" + try: + while self._running: + try: + data = await websocket.receive_json() + except WebSocketDisconnect: + break + event_type = data.get("event") + if event_type == "media": + payload = data.get("media", {}).get("payload", "") + if payload: + await audio_queue.put(payload) + elif event_type == "stop": + logger.info("Twilio stream stopped") + break + finally: + await audio_queue.put(None) # sentinel + + reader_task = asyncio.create_task(_reader()) + try: + while True: + payload = await audio_queue.get() + if payload is None: + break + await self.backend.feed_twilio_audio(session, payload) + finally: + if not reader_task.done(): + reader_task.cancel() + + # ----------------------------------------------------------------------- + # AI processing + # ----------------------------------------------------------------------- + + async def _process_query(self, text: str) -> None: + """Run AgenticSystem and speak the response via RoomKit TTS.""" + if not self._session: + return + try: + async for response in self.agentic_system.process_query(text): + if response: + self._log_event("llm_response", {"frame": response}) + self._save_transcript_entry("assistant", response) + await self.voice.say(self._session, response) + except asyncio.CancelledError: + logger.info("Query cancelled (user interrupted)") + except Exception as e: + logger.error(f"Error processing query: {e}", exc_info=True) + + # ----------------------------------------------------------------------- + # Logging and output + # ----------------------------------------------------------------------- + + def _log_event(self, event_type: str, data: dict) -> None: + self._framework_logs.append({ + "type": event_type, + "data": data, + "timestamp": int(time.time() * 1000), + "conversation_id": self.conversation_id, + }) + + def _save_transcript_entry(self, role: str, content: str) -> None: + entry = { + "timestamp": datetime.now(timezone.utc).isoformat(), + "role": role, + "content": content, + } + try: + with open(self.output_dir / "transcript.jsonl", "a", encoding="utf-8") as f: + f.write(json.dumps(entry, ensure_ascii=False) + "\n") + except Exception as e: + logger.error(f"Error saving transcript: {e}") + + def _save_latencies(self) -> None: + latencies = self._latency_measurements + try: + with open(self.output_dir / "response_latencies.json", "w") as f: + json.dump({ + "latencies": latencies, + "mean": sum(latencies) / len(latencies) if latencies else 0.0, + "max": max(latencies) if latencies else 0.0, + "count": len(latencies), + }, f, indent=2) + except Exception as e: + logger.error(f"Error saving latencies: {e}") + + async def _save_outputs(self) -> None: + # Audit log + self.audit_log.save(self.output_dir / "audit_log.json") + + # Transcript fallback + transcript_path = self.output_dir / "transcript.jsonl" + if not transcript_path.exists(): + self.audit_log.save_transcript_jsonl(transcript_path) + + # Scenario DB snapshots (REQUIRED) + try: + with open(self.output_dir / "initial_scenario_db.json", "w") as f: + json.dump(self.tool_handler.original_db, f, indent=2, sort_keys=True, default=str) + with open(self.output_dir / "final_scenario_db.json", "w") as f: + json.dump(self.tool_handler.db, f, indent=2, sort_keys=True, default=str) + except Exception as e: + logger.error(f"Error saving scenario DBs: {e}", exc_info=True) + raise + + # Audio: copy RoomKit's WAV recordings to EVA's expected filenames + self._copy_recordings() + + # Framework logs + with open(self.output_dir / "framework_logs.jsonl", "w") as f: + for entry in self._framework_logs: + f.write(json.dumps(entry) + "\n") + + # ElevenLabs events stub (required by metrics processor) + self._write_elevenlabs_stub() + + logger.info(f"Outputs saved to {self.output_dir}") + + def _copy_recordings(self) -> None: + """Rename RoomKit's WAV recordings to EVA's expected filenames.""" + # ALL mode produces: *_inbound.wav, *_outbound.wav, *_mixed.wav + name_map = {"inbound": "audio_user.wav", "outbound": "audio_assistant.wav", "mixed": "audio_mixed.wav"} + found = 0 + for wav_file in self.output_dir.glob("*.wav"): + for suffix, eva_name in name_map.items(): + if wav_file.name.endswith(f"_{suffix}.wav"): + shutil.move(str(wav_file), self.output_dir / eva_name) + logger.info(f"Audio: {wav_file.name} -> {eva_name}") + found += 1 + break + if found == 0: + logger.warning("No recording WAV files found in output directory") + + def _write_elevenlabs_stub(self) -> None: + """Write a minimal elevenlabs_events.jsonl for metrics compatibility.""" + stub_path = self.output_dir / "elevenlabs_events.jsonl" + with open(stub_path, "w") as f: + f.write(json.dumps({ + "type": "connection_state", + "timestamp": int(time.time() * 1000), + "sequence": 1, + "data": {"state": "ended", "details": {"reason": "goodbye"}}, + }) + "\n") diff --git a/src/eva/assistant/server.py b/src/eva/assistant/server.py index 57a0fc2e..6882d782 100644 --- a/src/eva/assistant/server.py +++ b/src/eva/assistant/server.py @@ -1,4 +1,4 @@ -"""Assistant server - Pipecat-based WebSocket server for voice conversations. +"""Pipecat-based assistant server for voice conversations. This module provides the Pipecat pipeline server that the user simulator connects to. It handles audio streaming via WebSocket with Twilio-style frame serialization. @@ -46,6 +46,7 @@ from pipecat.utils.time import time_now_iso8601 from eva.assistant.agentic.audit_log import AuditLog, current_timestamp_ms +from eva.assistant.base import AssistantServerBase from eva.assistant.pipeline.agent_processor import BenchmarkAgentProcessor, UserAudioCollector, UserObserver from eva.assistant.pipeline.audio_llm_processor import ( AudioLLMProcessor, @@ -80,8 +81,8 @@ INITIAL_MESSAGE = "Hello! How can I help you today?" -class AssistantServer: - """Pipecat-based WebSocket server for the assistant in voice conversations. +class PipecatAssistantServer(AssistantServerBase): + """Pipecat-based implementation of the assistant server. This server: - Accepts WebSocket connections from the user simulator @@ -879,3 +880,7 @@ async def override__maybe_trigger_user_turn_stopped(self): # For non-finalized, only trigger if timeout task has completed if self._timeout_task is None: await self.trigger_user_turn_stopped() + + +# Backward-compatible alias +AssistantServer = PipecatAssistantServer diff --git a/src/eva/assistant/tools/tool_executor.py b/src/eva/assistant/tools/tool_executor.py index 9722e514..a683d829 100644 --- a/src/eva/assistant/tools/tool_executor.py +++ b/src/eva/assistant/tools/tool_executor.py @@ -7,7 +7,6 @@ from typing import Callable import yaml -from pipecat.services.llm_service import FunctionCallParams from eva.utils.logging import get_logger @@ -124,11 +123,6 @@ def _load_tool_module(self) -> dict[str, Callable]: logger.error(f"Failed to load tool module {self.tool_module_path}: {e}") raise - async def execute_realtime_tool(self, params: FunctionCallParams): - logger.info(f"Executing realtime tool: {params.function_name}, params {params.arguments}") - result = await self.execute(params.function_name, params.arguments) - await params.result_callback(result) - async def execute(self, tool_name: str, params: dict) -> dict: """Execute a tool call and return the response. diff --git a/src/eva/metrics/processor.py b/src/eva/metrics/processor.py index ad4d5d6c..5fd2c622 100644 --- a/src/eva/metrics/processor.py +++ b/src/eva/metrics/processor.py @@ -10,7 +10,7 @@ from eva.models.results import ConversationResult from eva.utils.log_processing import ( AnnotationLabel, - aggregate_pipecat_logs_by_type, + aggregate_framework_logs_by_type, align_turn_keys, annotate_last_entry, append_turn_text, @@ -240,7 +240,7 @@ def _handle_audit_log_event( conversation_trace.append(get_entry_for_audit_log(event, state.turn_num)) -def _handle_pipecat_event( +def _handle_framework_event( event: dict, state: "_TurnExtractionState", context: "_ProcessorContext", @@ -752,30 +752,30 @@ def _load_audit_log_transcript(output_dir: Path) -> list[dict]: return history @staticmethod - def _load_pipecat_logs(pipecat_logs_path: str) -> list[dict]: - """Load and normalize pipecat log entries into history format.""" + def _load_framework_logs(framework_logs_path: str) -> list[dict]: + """Load and normalize framework log entries into history format.""" history = [] - raw_pipecat = [] - with open(pipecat_logs_path) as f: + raw_logs = [] + with open(framework_logs_path) as f: for line in f: - raw_pipecat.append(json.loads(line)) + raw_logs.append(json.loads(line)) allowed_types = {"turn_start", "turn_end", "tts_text", "llm_response"} - raw_pipecat = [entry for entry in raw_pipecat if entry.get("type") in allowed_types] + raw_logs = [entry for entry in raw_logs if entry.get("type") in allowed_types] # Some audio-native models emit llm_response (full text with spaces); some emits tts_text (per-token chunks). - has_tts_text = any(entry.get("type") == "tts_text" for entry in raw_pipecat) + has_tts_text = any(entry.get("type") == "tts_text" for entry in raw_logs) if has_tts_text: - raw_pipecat = [entry for entry in raw_pipecat if entry.get("type") != "llm_response"] + raw_logs = [entry for entry in raw_logs if entry.get("type") != "llm_response"] - grouped_pipecat = aggregate_pipecat_logs_by_type(raw_pipecat) - for entry in grouped_pipecat: + grouped_logs = aggregate_framework_logs_by_type(raw_logs) + for entry in grouped_logs: if (ts := entry.get("start_timestamp")) is None: continue history.append( { "timestamp_ms": int(ts), - "source": "pipecat", + "source": "framework", "event_type": entry.get("type", "unknown"), "data": entry.get("data", {}), } @@ -813,12 +813,12 @@ def _build_history( output_dir: Path, result: ConversationResult, ) -> None: - """Merge audit log, pipecat, and ElevenLabs logs into a timestamp-sorted context.history. + """Merge audit log, framework, and ElevenLabs logs into a timestamp-sorted context.history. Each entry: {timestamp_ms, source, event_type, data}. """ history = self._load_audit_log_transcript(output_dir) - history.extend(self._load_pipecat_logs(result.pipecat_logs_path)) + history.extend(self._load_framework_logs(result.framework_logs_path)) history.extend(self._load_elevenlabs_logs(result.elevenlabs_logs_path)) history.sort(key=lambda e: e["timestamp_ms"]) @@ -854,8 +854,8 @@ def _extract_turns_from_history(context: _ProcessorContext) -> None: for event in context.history: if event["source"] == "audit_log": _handle_audit_log_event(event, state, context, conversation_trace, context.is_audio_native) - elif event["source"] == "pipecat": - _handle_pipecat_event(event, state, context, conversation_trace) + elif event["source"] == "framework": + _handle_framework_event(event, state, context, conversation_trace) elif event["source"] == "elevenlabs": if _handle_elevenlabs_event(event, state, context, conversation_trace, context.is_audio_native): continue diff --git a/src/eva/models/config.py b/src/eva/models/config.py index cd8fe819..6aab97b5 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -281,6 +281,15 @@ class ModelDeployment(DeploymentTypedDict): description="Pipeline (STT + LLM + TTS), speech-to-speech, or audio-LLM model configuration", ) + # Assistant framework + framework: Literal["pipecat", "roomkit"] = Field( + "pipecat", + description=( + "Voice framework for the assistant server. " + "Set via EVA_FRAMEWORK=pipecat (default)." + ), + ) + # Run identifier run_id: str = Field( default_factory=current_date_and_time, diff --git a/src/eva/models/results.py b/src/eva/models/results.py index a10985f8..dae8b78b 100644 --- a/src/eva/models/results.py +++ b/src/eva/models/results.py @@ -4,7 +4,7 @@ from datetime import datetime from typing import Any, Optional -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, model_validator class ErrorDetails(BaseModel): @@ -63,9 +63,17 @@ class ConversationResult(BaseModel): transcript_path: Optional[str] = Field(None, description="Path to transcript JSONL file") audit_log_path: Optional[str] = Field(None, description="Path to audit log JSON file") conversation_log_path: Optional[str] = Field(None, description="Path to conversation log file") - pipecat_logs_path: Optional[str] = Field(None, description="Path to pipecat logs JSONL file") + framework_logs_path: Optional[str] = Field(None, description="Path to framework logs JSONL file") elevenlabs_logs_path: Optional[str] = Field(None, description="Path to elevenlabs logs JSONL file") + @model_validator(mode="before") + @classmethod + def _migrate_pipecat_logs_path(cls, data: Any) -> Any: + """Accept legacy ``pipecat_logs_path`` field from old result.json files.""" + if isinstance(data, dict) and "pipecat_logs_path" in data and "framework_logs_path" not in data: + data["framework_logs_path"] = data.pop("pipecat_logs_path") + return data + # Summary stats (pre-metrics) num_turns: int = Field(0, description="Number of conversation turns") num_tool_calls: int = Field(0, description="Number of tool calls made") diff --git a/src/eva/orchestrator/worker.py b/src/eva/orchestrator/worker.py index bf549d6b..2c34e4fd 100644 --- a/src/eva/orchestrator/worker.py +++ b/src/eva/orchestrator/worker.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import Any, Optional -from eva.assistant.server import AssistantServer +from eva.assistant import create_assistant_server from eva.models.agents import AgentConfig from eva.models.config import RunConfig from eva.models.record import EvaluationRecord @@ -223,7 +223,7 @@ async def run(self) -> ConversationResult: transcript_path=str(self.output_dir / "transcript.jsonl"), audit_log_path=str(self.output_dir / "audit_log.json"), conversation_log_path=str(self.output_dir / "logs.log"), - pipecat_logs_path=str(self.output_dir / "pipecat_logs.jsonl"), + framework_logs_path=str(self.output_dir / "framework_logs.jsonl"), elevenlabs_logs_path=str(self.output_dir / "elevenlabs_events.jsonl"), num_turns=self._conversation_stats.get("num_turns", 0), num_tool_calls=self._conversation_stats.get("num_tool_calls", 0), @@ -235,7 +235,8 @@ async def run(self) -> ConversationResult: async def _start_assistant(self) -> None: """Start the assistant server.""" - self._assistant_server = AssistantServer( + self._assistant_server = create_assistant_server( + framework=self.config.framework, current_date_time=self.record.current_date_time, pipeline_config=self.config.model, agent=self.agent, diff --git a/src/eva/utils/conversation_checks.py b/src/eva/utils/conversation_checks.py index d284549e..8b670a0e 100644 --- a/src/eva/utils/conversation_checks.py +++ b/src/eva/utils/conversation_checks.py @@ -60,13 +60,13 @@ def check_conversation_finished(output_dir: Path) -> bool: def find_records_with_llm_generic_error(output_dir: Path, record_ids: set[str] | list[str]) -> list[str]: - """Find records that have the LLM generic error message in pipecat_logs.jsonl.""" + """Find records that have the LLM generic error message in framework_logs.jsonl.""" affected = [] for record_id in record_ids: - pipecat_logs_path = output_dir / "records" / record_id / "pipecat_logs.jsonl" - if not pipecat_logs_path.exists(): + framework_logs_path = output_dir / "records" / record_id / "framework_logs.jsonl" + if not framework_logs_path.exists(): continue - with open(pipecat_logs_path) as f: + with open(framework_logs_path) as f: for line in f: line = line.strip() if not line: diff --git a/src/eva/utils/log_processing.py b/src/eva/utils/log_processing.py index bc960151..0dd3730c 100644 --- a/src/eva/utils/log_processing.py +++ b/src/eva/utils/log_processing.py @@ -178,8 +178,8 @@ def align_turn_keys( audio_timestamps.setdefault(key, timestamps_default) -def aggregate_pipecat_logs_by_type(pipecat_logs: list[dict]) -> list[dict]: - """Aggregate consecutive pipecat logs of the same type. +def aggregate_framework_logs_by_type(framework_logs: list[dict]) -> list[dict]: + """Aggregate consecutive framework logs of the same type. Only tts_text/llm_response entries can appear consecutively and need aggregation; turn_start/turn_end are single events that pass through unchanged. @@ -188,21 +188,21 @@ def aggregate_pipecat_logs_by_type(pipecat_logs: list[dict]) -> list[dict]: already contain proper spacing and are joined without a separator. Args: - pipecat_logs: Filtered pipecat logs (tts_text, llm_response, turn_start, turn_end). + framework_logs: Filtered framework logs (tts_text, llm_response, turn_start, turn_end). Returns: A list of aggregated log dictionaries. """ - if not pipecat_logs: + if not framework_logs: return [] aggregated: list[dict] = [] - current = pipecat_logs[0] + current = framework_logs[0] data_key = next(iter(current["data"])) text = current["data"][data_key] min_ts = max_ts = current.get("timestamp", 0) - for log in pipecat_logs[1:]: + for log in framework_logs[1:]: if log["type"] == current["type"]: # Consecutive text chunks — tts_text needs space, llm_response already has spacing sep = " " if current["type"] == "tts_text" else "" diff --git a/tests/artifacts/records/1.1.2/pipecat_logs.jsonl b/tests/artifacts/records/1.1.2/framework_logs.jsonl similarity index 100% rename from tests/artifacts/records/1.1.2/pipecat_logs.jsonl rename to tests/artifacts/records/1.1.2/framework_logs.jsonl diff --git a/tests/artifacts/records/1.1.2/result.json b/tests/artifacts/records/1.1.2/result.json index ff7b3b9c..f33c76eb 100644 --- a/tests/artifacts/records/1.1.2/result.json +++ b/tests/artifacts/records/1.1.2/result.json @@ -34,7 +34,7 @@ "transcript_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/transcript.jsonl", "audit_log_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/audit_log.json", "conversation_log_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/logs.log", - "pipecat_logs_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/pipecat_logs.jsonl", + "framework_logs_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/framework_logs.jsonl", "elevenlabs_logs_path": "output/2026-03-01_22-22-03.547446/records/1.1.2/elevenlabs_events.jsonl", "num_turns": 14, "num_tool_calls": 5, diff --git a/tests/fixtures/processor_histories.json b/tests/fixtures/processor_histories.json index 10afc5cb..de8623af 100644 --- a/tests/fixtures/processor_histories.json +++ b/tests/fixtures/processor_histories.json @@ -3,24 +3,24 @@ "id": "normal_conversation", "description": "Normal 2-turn exchange: assistant greeting + 1 user/assistant round, no interruptions", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello how can I help you?"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "I need to change my flight"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to change my flight"}}}, {"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}}, - {"timestamp_ms": 4500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure let me help with that."}}, + {"timestamp_ms": 4500, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure let me help with that."}}, {"timestamp_ms": 4550, "source": "audit_log", "event_type": "assistant", "data": "Sure let me help with that."}, {"timestamp_ms": 4600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.5}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, let me help with that."}}}, {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.5}}, - {"timestamp_ms": 5600, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 6000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 5600, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 6000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 6100, "source": "audit_log", "event_type": "user", "data": "Thanks, goodbye"}, {"timestamp_ms": 6200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.0}}, {"timestamp_ms": 6500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Thanks, goodbye"}}}, @@ -51,23 +51,23 @@ "id": "assistant_interrupts_user", "description": "Assistant starts speaking (audio_start) while user audio is still open \u2014 assistant interrupts user. Conversation ends after the interruption turn.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Welcome!"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "I want to rebook"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I want to rebook my flight"}}}, {"timestamp_ms": 3700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.7}}, - {"timestamp_ms": 3800, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Let me look that up."}}, + {"timestamp_ms": 3800, "source": "framework", "event_type": "tts_text", "data": {"frame": "Let me look that up."}}, {"timestamp_ms": 3850, "source": "audit_log", "event_type": "assistant", "data": "Let me look that up."}, {"timestamp_ms": 3900, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Let me look that up."}}}, {"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 4.5}}, - {"timestamp_ms": 4600, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4600, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -93,12 +93,12 @@ "id": "empty_user_speech", "description": "Conversation with assistant greeting only, user disconnects before speaking \u2014 tests _reconcile early return", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello!"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -122,25 +122,25 @@ "id": "conversation_with_tool_calls", "description": "Normal conversation with a tool call and tool response between assistant turns", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Welcome!"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "Look up reservation ABC123"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Look up reservation ABC123"}}}, {"timestamp_ms": 3800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.8}}, {"timestamp_ms": 4000, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "ABC123", "last_name": "Smith"}}}, {"timestamp_ms": 4100, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "success", "reservation": {"confirmation_number": "ABC123"}}}}, - {"timestamp_ms": 4500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found your reservation."}}, + {"timestamp_ms": 4500, "source": "framework", "event_type": "tts_text", "data": {"frame": "I found your reservation."}}, {"timestamp_ms": 4550, "source": "audit_log", "event_type": "assistant", "data": "I found your reservation."}, {"timestamp_ms": 4600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.5}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found your reservation."}}}, {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.5}}, - {"timestamp_ms": 5600, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 5600, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -168,7 +168,7 @@ "id": "sample_1_1_4_turn_6_delayed_elevenlabs_interruption", "description": "Reproduces sample 1.1.4 turns 4-7: ElevenLabs delivers assistant_speech and user audio_start BEFORE the next pipecat turn_start. The assistant's audio_start arrives while the user's audio is still open \u2014 a real interruption where the assistant starts speaking before the user finishes. Uses actual conversation text from the airline scenario.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 1010, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 0.9}}, {"timestamp_ms": 1050, "source": "audit_log", "event_type": "user", "data": "No. That code is wrong. It is chaos. as in kilo. Oh, I Thank you. L. as in Lima. T as in tango. S as in Sierra. F as in foxtrot."}, {"timestamp_ms": 1100, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "No, that code is wrong. \n[slow] It is K as in Kilo, O as in Oscar, L as in Lima, T as in Tango, S as in Sierra, F as in Foxtrot."}}}, @@ -176,13 +176,13 @@ {"timestamp_ms": 1500, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "KOLTSF", "last_name": "Johansson"}}}, {"timestamp_ms": 1600, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Last name does not match"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Thanks. I tried Kilo India Lima Tango Sierra Foxtrot with Johansson, and I still cannot find a match. Could you please double-check the confirmation code?"}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Thanks. I tried Kilo India Lima Tango Sierra Foxtrot with Johansson, and I still cannot find a match. Could you please double-check the confirmation code?"}}, {"timestamp_ms": 2150, "source": "audit_log", "event_type": "assistant", "data": "Thanks. I tried Kilo India Lima Tango Sierra Foxtrot with Johansson, and I still cannot find a match. Could you please double-check the confirmation code?"}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.5}}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Tried Kilo India Lima Tango Sierra Foxtrot with Johansson and I still cannot find a match. Could you please double-check the confirmation code?"}}}, {"timestamp_ms": 3700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.7}}, - {"timestamp_ms": 3800, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 4000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3800, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 4050, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yes, the last name is [slow] J O H A N S S O N. \nAnd the code is [slow] K as in Kilo, O as in Oscar, L as in Lima, T as in Tango, S as in Sierra, F as in Foxtrot."}}}, {"timestamp_ms": 4100, "source": "audit_log", "event_type": "user", "data": "Yes, the last name is... A."}, {"timestamp_ms": 4600, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.6}}, @@ -190,37 +190,37 @@ {"timestamp_ms": 4800, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "KOLTSF", "last_name": "A"}}}, {"timestamp_ms": 4900, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Last name does not match reservation KOLTSF"}}}, {"timestamp_ms": 5200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.2}}, - {"timestamp_ms": 5300, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Okay, I found a reservation for confirmation code Kilo Oscar Lima Tango Sierra Foxtrot, but the last name is not matching. What is the full last name on the ticket, spelled out letter by letter?"}}, + {"timestamp_ms": 5300, "source": "framework", "event_type": "tts_text", "data": {"frame": "Okay, I found a reservation for confirmation code Kilo Oscar Lima Tango Sierra Foxtrot, but the last name is not matching. What is the full last name on the ticket, spelled out letter by letter?"}}, {"timestamp_ms": 5350, "source": "audit_log", "event_type": "assistant", "data": "Okay, I found a reservation for confirmation code Kilo Oscar Lima Tango Sierra Foxtrot, but the last name is not matching. What is the full last name on the ticket, spelled out letter by letter?"}, {"timestamp_ms": 6300, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 6.3}}, - {"timestamp_ms": 6400, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 6400, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 6700, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found a reservation for confirmation code kilo Oscar Lima tango Sierra Foxtrot, but the last name is not matching. What is the full last name on the ticket spelled out letter by letter?"}}}, {"timestamp_ms": 6800, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.8}}, {"timestamp_ms": 6900, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "[slow] The last name is Johansson. \nSpelled [slow] J O H A N S S O N."}}}, - {"timestamp_ms": 7100, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 7100, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 7200, "source": "audit_log", "event_type": "user", "data": "The last name is Johansson. J. H"}, {"timestamp_ms": 7300, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "KOLTSF", "last_name": "Johansson"}}}, {"timestamp_ms": 7400, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "success"}}}, {"timestamp_ms": 7600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 7.6}}, - {"timestamp_ms": 7700, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Perfect, I have your reservation. You are booked Boston to Seattle on SkyWay flight eight ninety on Wednesday, August twentieth. What date would you like to move that return to, and do you want to keep an aisle seat if available?"}}, + {"timestamp_ms": 7700, "source": "framework", "event_type": "tts_text", "data": {"frame": "Perfect, I have your reservation. You are booked Boston to Seattle on SkyWay flight eight ninety on Wednesday, August twentieth. What date would you like to move that return to, and do you want to keep an aisle seat if available?"}}, {"timestamp_ms": 7750, "source": "audit_log", "event_type": "assistant", "data": "Perfect, I have your reservation. You are booked Boston to Seattle on SkyWay flight eight ninety on Wednesday, August twentieth. What date would you like to move that return to, and do you want to keep an aisle seat if available?"}, {"timestamp_ms": 7800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.8}}, {"timestamp_ms": 8500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 8.5}}, {"timestamp_ms": 8800, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I have your reservation. You are booked Boston to Seattle on Skyway flight 890 on Wednesday, August 20th. What date would you like to move that return to? And do you want to keep an aisle seat if available?"}}}, {"timestamp_ms": 8900, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 8.9}}, {"timestamp_ms": 9000, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I want to move the return to August twenty third. \nAnd yes, aisle seat if you have it."}}}, - {"timestamp_ms": 9100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 9100, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 9100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 9100, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 9300, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 9.3}}, {"timestamp_ms": 9400, "source": "audit_log", "event_type": "user", "data": "I want to move the return to August 23rd. And yes, I'll see it if you have it."}, {"timestamp_ms": 9500, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "search_rebooking_options", "parameters": {"origin": "BOS", "destination": "SEA", "date": "2026-08-23", "passenger_count": 1, "fare_class": "main_cabin"}}}, {"timestamp_ms": 9600, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "search_rebooking_options", "response": {"status": "success", "count": 3}}}, {"timestamp_ms": 9800, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 9.8}}, - {"timestamp_ms": 9900, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "On Sunday, August twenty-third, I have three nonstop options from Boston to Seattle: flight nine zero four departing twelve thirty PM, flight nine hundred departing two fifty PM, or flight nine ten departing four twenty PM. Because this is a voluntary change in Main Cabin, there is a seventy-five dollar change fee, plus any fare difference. The twelve thirty PM option is three hundred dollars, which matches what you paid for the return, so it would be seventy-five dollars total. Which flight would you like?"}}, + {"timestamp_ms": 9900, "source": "framework", "event_type": "tts_text", "data": {"frame": "On Sunday, August twenty-third, I have three nonstop options from Boston to Seattle: flight nine zero four departing twelve thirty PM, flight nine hundred departing two fifty PM, or flight nine ten departing four twenty PM. Because this is a voluntary change in Main Cabin, there is a seventy-five dollar change fee, plus any fare difference. The twelve thirty PM option is three hundred dollars, which matches what you paid for the return, so it would be seventy-five dollars total. Which flight would you like?"}}, {"timestamp_ms": 9950, "source": "audit_log", "event_type": "assistant", "data": "On Sunday, August twenty-third, I have three nonstop options from Boston to Seattle: flight nine zero four departing twelve thirty PM, flight nine hundred departing two fifty PM, or flight nine ten departing four twenty PM. Because this is a voluntary change in Main Cabin, there is a seventy-five dollar change fee, plus any fare difference. The twelve thirty PM option is three hundred dollars, which matches what you paid for the return, so it would be seventy-five dollars total. Which flight would you like?"}, {"timestamp_ms": 11000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 11.0}}, {"timestamp_ms": 11200, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "On Sunday, August 23rd, I have three nonstop options from Boston to Seattle. Flight 904 departing 1230 p.m., flight 900 departing 250 p.m., or flight 910 departing 420 p.m. Because this is a voluntary change in main cabin. There is a $75 change fee plus any fare difference. The 1230 p.m. option is $300, which matches what you paid for the return, so it would be $75 total. Which flight would you like?"}}}, - {"timestamp_ms": 11500, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 11500, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 11700, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -260,35 +260,35 @@ "id": "user_interrupts_assistant_with_tool_calls_1_2_1", "description": "Based on record 1.2.1: User interrupts assistant mid-confirmation at turn 2 (user audio_start while assistant audio still open). After interruption, user provides full flight change request, triggering a failed tool call. Assistant asks for last name, then turn 3 succeeds with correct tool calls. Tests interruption detection, skip_next_turn_increment keeping continuation in same turn, [assistant starts replying - user interrupts] prefix, and turn alignment with tool calls.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you today?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you today?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.2}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.5}}, {"timestamp_ms": 1600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you today?"}}}, {"timestamp_ms": 1700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 1.7}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to change my flight to an earlier one today."}}}, - {"timestamp_ms": 1900, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 2000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1900, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 2000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 2100, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.1}}, {"timestamp_ms": 2200, "source": "audit_log", "event_type": "user", "data": "I need to change my flight to an earlier one today."}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.5}}, - {"timestamp_ms": 2600, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure, I can help with that. What is your six-character confirmation code and last name?"}}, + {"timestamp_ms": 2600, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure, I can help with that. What is your six-character confirmation code and last name?"}}, {"timestamp_ms": 2650, "source": "audit_log", "event_type": "assistant", "data": "Sure, I can help with that. What is your six-character confirmation code and last name?"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.2}}, {"timestamp_ms": 3400, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, I can help with that. What is your six-character confirmation code and last name?"}}}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.5}}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Code is six V O R J U. Last name Thompson."}}}, - {"timestamp_ms": 3700, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3800, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3700, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3800, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3900, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.9}}, {"timestamp_ms": 4000, "source": "audit_log", "event_type": "user", "data": "Code is 6. V-O-R-J-U. Last name Thompson."}, {"timestamp_ms": 4200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.2}}, - {"timestamp_ms": 4300, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Thanks. Just to confirm, that is the number six, then Victor Oscar Romeo Juliet Uniform, with last name Thompson, correct?"}}, + {"timestamp_ms": 4300, "source": "framework", "event_type": "tts_text", "data": {"frame": "Thanks. Just to confirm, that is the number six, then Victor Oscar Romeo Juliet Uniform, with last name Thompson, correct?"}}, {"timestamp_ms": 4350, "source": "audit_log", "event_type": "assistant", "data": "Thanks. Just to confirm, that is the number six, then Victor Oscar Romeo Juliet Uniform, with last name Thompson, correct?"}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Thanks."}}}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.5}}, - {"timestamp_ms": 4600, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 4700, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 4600, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4700, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 4800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 4.8}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I want to move my LAX to SFO flight today to an earlier direct flight before two PM, if the change fee is under eighty dollars."}}}, {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 5.5}}, @@ -296,14 +296,14 @@ {"timestamp_ms": 5700, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "6VORJU", "last_name": "UNKNOWN"}}}, {"timestamp_ms": 5800, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Last name does not match"}}}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 6.0}}, - {"timestamp_ms": 6100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I can help with that, but I need to verify your identity first. What is the last name on the reservation?"}}, + {"timestamp_ms": 6100, "source": "framework", "event_type": "tts_text", "data": {"frame": "I can help with that, but I need to verify your identity first. What is the last name on the reservation?"}}, {"timestamp_ms": 6150, "source": "audit_log", "event_type": "assistant", "data": "I can help with that, but I need to verify your identity first. What is the last name on the reservation?"}, {"timestamp_ms": 6800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 6.8}}, {"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I can help with that, but I need to verify your identity first. What is the last name on the reservation?"}}}, {"timestamp_ms": 7100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.1}}, {"timestamp_ms": 7200, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Last name is Thompson."}}}, - {"timestamp_ms": 7300, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 7400, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 7300, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 7400, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 7500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.5}}, {"timestamp_ms": 7600, "source": "audit_log", "event_type": "user", "data": "Last name is Thompson."}, {"timestamp_ms": 7700, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "6VORJU", "last_name": "Thompson"}}}, @@ -311,10 +311,10 @@ {"timestamp_ms": 7900, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "search_rebooking_options", "parameters": {"origin": "LAX", "destination": "SFO", "date": "2026-06-18", "passenger_count": 1}}}, {"timestamp_ms": 8000, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "search_rebooking_options", "response": {"status": "success", "count": 2}}}, {"timestamp_ms": 8200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 8.2}}, - {"timestamp_ms": 8300, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found two nonstop options. Flight SK one thirty at one PM for seventy-five dollars total, or flight SK two ten at eleven AM for one hundred twenty-five dollars. Which would you prefer?"}}, + {"timestamp_ms": 8300, "source": "framework", "event_type": "tts_text", "data": {"frame": "I found two nonstop options. Flight SK one thirty at one PM for seventy-five dollars total, or flight SK two ten at eleven AM for one hundred twenty-five dollars. Which would you prefer?"}}, {"timestamp_ms": 8350, "source": "audit_log", "event_type": "assistant", "data": "I found two nonstop options. Flight SK one thirty at one PM for seventy-five dollars total, or flight SK two ten at eleven AM for one hundred twenty-five dollars. Which would you prefer?"}, {"timestamp_ms": 9500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 9.5}}, - {"timestamp_ms": 9600, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 9600, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 9800, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found two nonstop options. Flight SK 130 at 1 PM for seventy-five dollars total, or flight SK 210 at 11 AM for one hundred twenty-five dollars. Which would you prefer?"}}}, {"timestamp_ms": 10000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], @@ -354,24 +354,24 @@ "id": "assistant_interrupts_user_1_2_3", "description": "Based on record 1.2.3: Assistant interrupts user at turn 3. User starts spelling confirmation code (audio_start arrives slightly before pipecat turn_start, in the previous turn's window). Assistant audio_start arrives while user audio is still open \u2014 a real interruption where the assistant cuts in mid-speech. Tests that user_audio_started_in_current_turn persists from the previous turn window (no false negative), skip_next_turn_increment keeps continuation in same turn, and [assistant interrupted user] prefix is applied.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you today?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you today?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.2}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.5}}, {"timestamp_ms": 1600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you today?"}}}, - {"timestamp_ms": 1700, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 1700, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1700, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 1700, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 1710, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 1.71}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to change my flight to an earlier departure."}}}, {"timestamp_ms": 2200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.2}}, {"timestamp_ms": 2300, "source": "audit_log", "event_type": "user", "data": "I need to change my flight to an earlier departure."}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.5}}, - {"timestamp_ms": 2600, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure, what is your six-character confirmation code and last name on the booking?"}}, + {"timestamp_ms": 2600, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure, what is your six-character confirmation code and last name on the booking?"}}, {"timestamp_ms": 2650, "source": "audit_log", "event_type": "assistant", "data": "Sure, what is your six-character confirmation code and last name on the booking?"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.2}}, {"timestamp_ms": 3400, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, what is your six-character confirmation code and last name on the booking?"}}}, - {"timestamp_ms": 3500, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3500, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3500, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3500, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3510, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.51}}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "The confirmation code is X X F six O H and the last name is Kim."}}}, {"timestamp_ms": 4200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.2}}, @@ -379,26 +379,26 @@ {"timestamp_ms": 4400, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "XXF60H", "last_name": "Kim"}}}, {"timestamp_ms": 4500, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Reservation XXF60H not found"}}}, {"timestamp_ms": 4800, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.8}}, - {"timestamp_ms": 4900, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I was not able to find that code. Could you spell it out one letter at a time?"}}, + {"timestamp_ms": 4900, "source": "framework", "event_type": "tts_text", "data": {"frame": "I was not able to find that code. Could you spell it out one letter at a time?"}}, {"timestamp_ms": 4950, "source": "audit_log", "event_type": "assistant", "data": "I was not able to find that code. Could you spell it out one letter at a time?"}, {"timestamp_ms": 5800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.8}}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I was not able to find that code. Could you spell it out one letter at a time?"}}}, {"timestamp_ms": 6010, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.01}}, - {"timestamp_ms": 6100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 6100, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 6100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 6100, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 6200, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "[slow] It is X X F six O H, with the letter O not zero."}}}, {"timestamp_ms": 6300, "source": "audit_log", "event_type": "user", "data": "It is..."}, {"timestamp_ms": 6500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 6.5}}, - {"timestamp_ms": 6600, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'm here. Take your time. Please say the six characters one at a time and I will confirm each one."}}, + {"timestamp_ms": 6600, "source": "framework", "event_type": "tts_text", "data": {"frame": "I'm here. Take your time. Please say the six characters one at a time and I will confirm each one."}}, {"timestamp_ms": 6650, "source": "audit_log", "event_type": "assistant", "data": "I'm here. Take your time. Please say the six characters one at a time and I will confirm each one."}, {"timestamp_ms": 7200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 7.2}}, - {"timestamp_ms": 7500, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 7500, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 7500, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 7500, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 7800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.8}}, {"timestamp_ms": 7900, "source": "audit_log", "event_type": "user", "data": "X. F. 6. O. H. with the letter O. not zero."}, {"timestamp_ms": 8000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I'm here. Take your time. Please say the six characters one at a time and I will confirm each one."}}}, {"timestamp_ms": 8200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 8.2}}, - {"timestamp_ms": 8300, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Got it. So I have X, F, six, O, H. Let me try that with last name Kim."}}, + {"timestamp_ms": 8300, "source": "framework", "event_type": "tts_text", "data": {"frame": "Got it. So I have X, F, six, O, H. Let me try that with last name Kim."}}, {"timestamp_ms": 8350, "source": "audit_log", "event_type": "assistant", "data": "Got it. So I have X, F, six, O, H. Let me try that with last name Kim."}, {"timestamp_ms": 8400, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "XXF6OH", "last_name": "Kim"}}}, {"timestamp_ms": 8500, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Reservation XXF6OH not found"}}}, @@ -406,7 +406,7 @@ {"timestamp_ms": 9200, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Got it. So I have X, F, six, O, H. Let me try that with last name Kim."}}}, {"timestamp_ms": 9300, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 9.3}}, {"timestamp_ms": 9400, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "No, that is not right. It is X X F six O H."}}}, - {"timestamp_ms": 9500, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 9500, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 9800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 9.8}}, {"timestamp_ms": 10000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], @@ -445,36 +445,36 @@ "id": "realtime_equal_timestamps", "description": "Realtime S2S pipeline: turn_end, turn_start, and audit_log user entry share the same timestamp at turn boundaries. Tests that the sort key (turn_end=0, turn_start=1, other=2) correctly orders events so user entries are assigned to the new turn. Includes a tool call turn.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello! How can I help you today?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello! How can I help you today?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.2}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you today?"}}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3000, "source": "audit_log", "event_type": "user", "data": "Hi, I need to change my flight to March 25th."}, {"timestamp_ms": 3100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.1}}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Hi, I need to change my flight to March twenty fifth."}}}, {"timestamp_ms": 3800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.8}}, - {"timestamp_ms": 4000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure, I can help with that. Could you provide your confirmation number and last name?"}}, + {"timestamp_ms": 4000, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure, I can help with that. Could you provide your confirmation number and last name?"}}, {"timestamp_ms": 4050, "source": "audit_log", "event_type": "assistant", "data": "Sure, I can help with that. Could you provide your confirmation number and last name?"}, {"timestamp_ms": 4100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.1}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.0}}, {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, I can help with that. Could you provide your confirmation number and last name?"}}}, - {"timestamp_ms": 6000, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 6000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 6000, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 6000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 6000, "source": "audit_log", "event_type": "user", "data": "Yeah, it is VK3FFW, and my last name is Rodriguez."}, {"timestamp_ms": 6100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.1}}, {"timestamp_ms": 6200, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yeah, it is Z K three F F W, and my last name is Rodriguez."}}}, {"timestamp_ms": 6800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.8}}, {"timestamp_ms": 7000, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "VK3FFW", "last_name": "Rodriguez"}}}, {"timestamp_ms": 7000, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Reservation VK3FFW not found"}}}, - {"timestamp_ms": 7500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "It seems I couldn't find the reservation. Could you double-check the confirmation number?"}}, + {"timestamp_ms": 7500, "source": "framework", "event_type": "tts_text", "data": {"frame": "It seems I couldn't find the reservation. Could you double-check the confirmation number?"}}, {"timestamp_ms": 7550, "source": "audit_log", "event_type": "assistant", "data": "It seems I couldn't find the reservation. Could you double-check the confirmation number?"}, {"timestamp_ms": 7600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 7.6}}, {"timestamp_ms": 8500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 8.5}}, {"timestamp_ms": 9000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "It seems I couldn't find the reservation. Could you double-check the confirmation number?"}}}, - {"timestamp_ms": 9500, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 9500, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 10000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -506,25 +506,25 @@ "description": "S2S pipeline (llm_response): greeting at turn 0, user reply at turn 1, assistant speak\u2192tool_call\u2192speak at turn 2. Tests that: (1) user entries use ElevenLabs intended text, (2) assistant entries come from audit_log with proper tool call boundaries, (3) greeting is moved to front when ElevenLabs timestamps arrive before audit_log.", "is_audio_native": true, "history": [ - {"timestamp_ms": 4784, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 7000, "source": "pipecat", "event_type": "llm_response", "data": {"frame": "Hello, thank you for calling SkyWay Airlines. How can I help you?"}}, + {"timestamp_ms": 4784, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 7000, "source": "framework", "event_type": "llm_response", "data": {"frame": "Hello, thank you for calling SkyWay Airlines. How can I help you?"}}, {"timestamp_ms": 7188, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 7.188}}, {"timestamp_ms": 13227, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 13.227}}, {"timestamp_ms": 14602, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, thank you for calling Skyway Airlines. How can I help you?"}}}, {"timestamp_ms": 15013, "source": "audit_log", "event_type": "assistant", "data": "Hello, thank you for calling SkyWay Airlines. How can I help you?"}, - {"timestamp_ms": 15412, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 15412, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 15412, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 15412, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 14608, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 14.608}}, {"timestamp_ms": 14832, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to change my flight to March 25th."}}}, {"timestamp_ms": 18028, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 18.028}}, {"timestamp_ms": 15414, "source": "audit_log", "event_type": "user", "data": "Hi. I need to change my flight to March 25."}, - {"timestamp_ms": 19000, "source": "pipecat", "event_type": "llm_response", "data": {"frame": "Sure! Could you provide your confirmation number and last name?"}}, + {"timestamp_ms": 19000, "source": "framework", "event_type": "llm_response", "data": {"frame": "Sure! Could you provide your confirmation number and last name?"}}, {"timestamp_ms": 19590, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 19.59}}, {"timestamp_ms": 30175, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 30.175}}, {"timestamp_ms": 32912, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure! Could you provide your confirmation number and last name?"}}}, {"timestamp_ms": 32044, "source": "audit_log", "event_type": "assistant", "data": "Sure! Could you provide your confirmation number and last name?"}, - {"timestamp_ms": 34059, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 34059, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 34059, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 34059, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 32916, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 32.916}}, {"timestamp_ms": 33960, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Confirmation is ZK3FFW, last name Rodriguez."}}}, {"timestamp_ms": 40596, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 40.596}}, @@ -535,12 +535,12 @@ {"timestamp_ms": 66393, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "search_flights", "parameters": {"origin": "AUS", "destination": "ORD", "date": "2026-03-25"}}}, {"timestamp_ms": 66393, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "search_flights", "response": {"status": "success", "count": 3}}}, {"timestamp_ms": 66396, "source": "audit_log", "event_type": "assistant", "data": "I found your reservation. You are booked on flight SK621 from Austin to Chicago."}, - {"timestamp_ms": 41000, "source": "pipecat", "event_type": "llm_response", "data": {"frame": "Thank you! Let me pull up your booking.\nI found your reservation. You are booked on flight SK621 from Austin to Chicago."}}, + {"timestamp_ms": 41000, "source": "framework", "event_type": "llm_response", "data": {"frame": "Thank you! Let me pull up your booking.\nI found your reservation. You are booked on flight SK621 from Austin to Chicago."}}, {"timestamp_ms": 41880, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 41.88}}, {"timestamp_ms": 64809, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 64.809}}, {"timestamp_ms": 69138, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Thank you. Let me pull up your booking. I found your reservation. You are booked on flight SK 621 from Austin to Chicago."}}}, - {"timestamp_ms": 70314, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 70314, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 70314, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 70314, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 69150, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 69.15}}, {"timestamp_ms": 69751, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "That is not right. I am Austin to LAX."}}}, {"timestamp_ms": 76172, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 76.172}}, @@ -576,18 +576,18 @@ "id": "assistant_interrupts_short_fragment", "description": "Assistant interrupts user and gets cut off after a few words. Tests that the post-loop validation strips interruption labels before comparing audit_log text against pipecat text, so short fragments like '[assistant interrupts] I'd be [speaker likely cut itself off]' are not filtered out.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.5}}, {"timestamp_ms": 1600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you?"}}}, - {"timestamp_ms": 1700, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 1700, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1700, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 1700, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 1710, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 1.71}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need help with my reservation and I also need to"}}}, {"timestamp_ms": 2200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.2}}, {"timestamp_ms": 2300, "source": "audit_log", "event_type": "user", "data": "I need help with my reservation and I also need to"}, - {"timestamp_ms": 2400, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'd be happy to help you with your reservation."}}, + {"timestamp_ms": 2400, "source": "framework", "event_type": "tts_text", "data": {"frame": "I'd be happy to help you with your reservation."}}, {"timestamp_ms": 2450, "source": "audit_log", "event_type": "assistant", "data": "I'd be"}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.5}}, {"timestamp_ms": 2600, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.6}}, @@ -595,12 +595,12 @@ {"timestamp_ms": 2800, "source": "audit_log", "event_type": "user", "data": "change my flight date."}, {"timestamp_ms": 2900, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I'd be"}}}, {"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.0}}, - {"timestamp_ms": 3100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure, what is your confirmation code?"}}, + {"timestamp_ms": 3100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure, what is your confirmation code?"}}, {"timestamp_ms": 3150, "source": "audit_log", "event_type": "assistant", "data": "Sure, what is your confirmation code?"}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.5}}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, what is your confirmation code?"}}}, - {"timestamp_ms": 3700, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3700, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3700, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3700, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3800, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.8}}, {"timestamp_ms": 3900, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "It is ABC123."}}}, {"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}}, @@ -631,39 +631,39 @@ "id": "late_arriving_elevenlabs_transcripts", "description": "Based on record 1.1.4/trial_2 turns 2-4. ElevenLabs delivers assistant_speech and user_speech transcripts AFTER audio_start(elevenlabs_user) has already advanced the turn. Tests that (1) late assistant_speech lands at the turn where assistant audio started, not the current turn, (2) late assistant_speech does not cause a spurious turn advance, (3) user_speech lands at the turn where user audio started, and (4) user_speech arriving before the first audio_start(elevenlabs_user) is buffered and replayed at the correct turn.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.2}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.5}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you?"}}}, {"timestamp_ms": 2010, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to rebook my flight."}}}, {"timestamp_ms": 2020, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 2300, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.3}}, {"timestamp_ms": 2500, "source": "audit_log", "event_type": "user", "data": "I need to rebook my flight."}, {"timestamp_ms": 3000, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "ABC123", "last_name": "Smith"}}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "error", "message": "Not found"}}}, {"timestamp_ms": 3500, "source": "audit_log", "event_type": "assistant", "data": "I couldn't find that reservation. Could you double-check?"}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.6}}, - {"timestamp_ms": 3700, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I couldn't find that reservation. Could you double-check?"}}, + {"timestamp_ms": 3700, "source": "framework", "event_type": "tts_text", "data": {"frame": "I couldn't find that reservation. Could you double-check?"}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.0}}, {"timestamp_ms": 5500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I couldn't find that reservation. Could you double-check?"}}}, {"timestamp_ms": 5510, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 5.5}}, - {"timestamp_ms": 5600, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 5600, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 5600, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 5600, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 5700, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "It is K as in kilo, O as in oscar, L as in lima."}}}, {"timestamp_ms": 6000, "source": "audit_log", "event_type": "user", "data": "K as in kilo O as in Oscar"}, {"timestamp_ms": 6500, "source": "audit_log", "event_type": "user", "data": "L as in Lima"}, {"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.0}}, {"timestamp_ms": 8000, "source": "audit_log", "event_type": "assistant", "data": "I'm sorry, could you spell that more slowly?"}, {"timestamp_ms": 8100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 8.1}}, - {"timestamp_ms": 8200, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'm sorry, could you spell that more slowly?"}}, + {"timestamp_ms": 8200, "source": "framework", "event_type": "tts_text", "data": {"frame": "I'm sorry, could you spell that more slowly?"}}, {"timestamp_ms": 9000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 9.0}}, {"timestamp_ms": 9500, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 9.5}}, {"timestamp_ms": 9600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I'm sorry, could you spell that more slowly?"}}}, - {"timestamp_ms": 9700, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 9700, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 9700, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 9700, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 10000, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Sure. K as in kilo, O as in oscar, L as in lima."}}}, {"timestamp_ms": 10500, "source": "audit_log", "event_type": "user", "data": "K O L"}, {"timestamp_ms": 11000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 11.0}}, @@ -698,37 +698,37 @@ "id": "user_speech_buffered_after_assistant_response", "description": "Based on record 1.1.5/trial_0 turns 8-9. ElevenLabs delivers user_speech for the next turn BEFORE audio_start(elevenlabs_user), but AFTER the assistant has responded to the previous turn. Tests that user_speech is buffered and replayed at the correct turn (9) instead of being appended to the previous turn (8). The key difference from a late transcript is that the assistant responded in between, so this is a new speaking session.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.2}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.5}}, {"timestamp_ms": 1600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you?"}}}, {"timestamp_ms": 1700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 1.7}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I want to change both flights."}}}, - {"timestamp_ms": 1900, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 1900, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1900, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 1900, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 2100, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.1}}, {"timestamp_ms": 2200, "source": "audit_log", "event_type": "user", "data": "I want to change both flights."}, {"timestamp_ms": 2500, "source": "audit_log", "event_type": "assistant", "data": "I found options for both flights. Which would you prefer?"}, {"timestamp_ms": 2600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 2.6}}, - {"timestamp_ms": 2700, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found options for both flights. Which would you prefer?"}}, + {"timestamp_ms": 2700, "source": "framework", "event_type": "tts_text", "data": {"frame": "I found options for both flights. Which would you prefer?"}}, {"timestamp_ms": 4000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 4.0}}, {"timestamp_ms": 4200, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found options for both flights. Which would you prefer?"}}}, {"timestamp_ms": 4300, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.3}}, {"timestamp_ms": 4400, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "That works, main cabin please."}}}, - {"timestamp_ms": 4500, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 4500, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 4500, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4500, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 4700, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.7}}, {"timestamp_ms": 4800, "source": "audit_log", "event_type": "user", "data": "That works. Main cabin."}, {"timestamp_ms": 5200, "source": "audit_log", "event_type": "assistant", "data": "Just to confirm before I make the changes: flight one on November third, flight two on November fifth. Shall I proceed?"}, {"timestamp_ms": 5300, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.3}}, - {"timestamp_ms": 5400, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Just to confirm before I make the changes: flight one on November third, flight two on November fifth. Shall I proceed?"}}, + {"timestamp_ms": 5400, "source": "framework", "event_type": "tts_text", "data": {"frame": "Just to confirm before I make the changes: flight one on November third, flight two on November fifth. Shall I proceed?"}}, {"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 7.0}}, - {"timestamp_ms": 7100, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 7100, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 7500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Just to confirm, flight one on November third, flight two on November fifth. Shall I proceed?"}}}, {"timestamp_ms": 7600, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yes, go ahead and rebook both."}}}, {"timestamp_ms": 7700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.7}}, - {"timestamp_ms": 7800, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 7800, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 8000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 8.0}}, {"timestamp_ms": 8200, "source": "audit_log", "event_type": "user", "data": "Yes, go ahead."}, {"timestamp_ms": 9000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} @@ -757,23 +757,23 @@ "id": "cascade_audit_truncated_to_spoken", "description": "Cascade pipeline: audit_log/assistant has the full LLM response but TTS only received a prefix before the conversation ended. Tests that _truncate_to_spoken truncates audit_log text to the portion matching tts_text in the conversation_trace.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Welcome!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Welcome!"}}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 2.0}}, - {"timestamp_ms": 2100, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2100, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "I need to rebook my flight"}, {"timestamp_ms": 3200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to rebook my flight"}}}, {"timestamp_ms": 3800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.8}}, - {"timestamp_ms": 4000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure, I can help you with that."}}, + {"timestamp_ms": 4000, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure, I can help you with that."}}, {"timestamp_ms": 4050, "source": "audit_log", "event_type": "assistant", "data": "Sure, I can help you with that. Could you please provide your confirmation number and last name?"}, {"timestamp_ms": 4100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 4.0}}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, I can help you with that."}}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 5.0}}, - {"timestamp_ms": 5100, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 5100, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -800,24 +800,24 @@ "description": "S2S pipeline (llm_response): audit_log/assistant has the full LLM response but llm_response only covers the spoken prefix. Tests that _truncate_to_spoken truncates audit_log text for S2S conversations.", "is_audio_native": true, "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "llm_response", "data": {"frame": "Welcome to SkyWay Airlines!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "llm_response", "data": {"frame": "Welcome to SkyWay Airlines!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.8}}, {"timestamp_ms": 2000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Welcome to SkyWay Airlines!"}}}, {"timestamp_ms": 2100, "source": "audit_log", "event_type": "assistant", "data": "Welcome to SkyWay Airlines!"}, - {"timestamp_ms": 2200, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 2200, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 2200, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 2200, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 2300, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.3}}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need to change my flight date."}}}, {"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "I need to change my flight date."}, - {"timestamp_ms": 3500, "source": "pipecat", "event_type": "llm_response", "data": {"frame": "Sure, I can help you change your flight."}}, + {"timestamp_ms": 3500, "source": "framework", "event_type": "llm_response", "data": {"frame": "Sure, I can help you change your flight."}}, {"timestamp_ms": 3550, "source": "audit_log", "event_type": "assistant", "data": "Sure, I can help you change your flight. What is your confirmation number and last name so I can pull up the reservation?"}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.6}}, {"timestamp_ms": 4200, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure, I can help you change your flight."}}}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 4.5}}, - {"timestamp_ms": 4600, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4600, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 5000, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -843,48 +843,48 @@ "id": "empty_user_audio_session_no_turn_advance", "description": "Short user audio burst with no user_speech should not create a new turn. Late audit_log/user STT chunk from the previous session stays at the same turn. Turn 2 user_speech comes from the next real session.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "turn_start", "data": {}}, - {"timestamp_ms": 1100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Welcome to SkyWay Airlines!"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1100, "source": "framework", "event_type": "tts_text", "data": {"frame": "Welcome to SkyWay Airlines!"}}, {"timestamp_ms": 1200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Welcome to SkyWay Airlines!"}}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.8}}, - {"timestamp_ms": 1900, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 2000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 1900, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 2000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 2100, "source": "audit_log", "event_type": "user", "data": "Hi, I'm calling"}, {"timestamp_ms": 2200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.0}}, {"timestamp_ms": 2500, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Hi, I'm calling about a reservation."}}}, {"timestamp_ms": 3000, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 3.0}}, {"timestamp_ms": 3100, "source": "audit_log", "event_type": "user", "data": "Sure, confirmation number is N53W23"}, - {"timestamp_ms": 3200, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Could you please provide your last name?"}}, + {"timestamp_ms": 3200, "source": "framework", "event_type": "tts_text", "data": {"frame": "Could you please provide your last name?"}}, {"timestamp_ms": 3250, "source": "audit_log", "event_type": "assistant", "data": "Could you please provide your last name?"}, {"timestamp_ms": 3300, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.2}}, {"timestamp_ms": 3600, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Could you please provide your last name?"}}}, {"timestamp_ms": 3900, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 3.8}}, - {"timestamp_ms": 3950, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 4000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 3950, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 4000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 4100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.0}}, {"timestamp_ms": 4500, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 4.5}}, - {"timestamp_ms": 5000, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 5000, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 5050, "source": "audit_log", "event_type": "user", "data": "My name is Cruz."}, {"timestamp_ms": 5200, "source": "audit_log", "event_type": "tool_call", "data": {"tool": "get_reservation", "parameters": {"confirmation_number": "N53W23", "last_name": "Cruz"}}}, {"timestamp_ms": 5200, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_reservation", "response": {"status": "success"}}}, {"timestamp_ms": 5500, "source": "audit_log", "event_type": "assistant", "data": "I found your reservation for Natalie Cruz."}, - {"timestamp_ms": 5600, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I found your reservation for Natalie Cruz."}}, + {"timestamp_ms": 5600, "source": "framework", "event_type": "tts_text", "data": {"frame": "I found your reservation for Natalie Cruz."}}, {"timestamp_ms": 5700, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.6}}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I found your reservation for Natalie Cruz."}}}, {"timestamp_ms": 6300, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 6.2}}, - {"timestamp_ms": 6400, "source": "pipecat", "event_type": "turn_end", "data": {}}, - {"timestamp_ms": 6500, "source": "pipecat", "event_type": "turn_start", "data": {}}, + {"timestamp_ms": 6400, "source": "framework", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 6500, "source": "framework", "event_type": "turn_start", "data": {}}, {"timestamp_ms": 6600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.5}}, {"timestamp_ms": 6900, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yes, that is the right reservation."}}}, {"timestamp_ms": 7200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 7.1}}, {"timestamp_ms": 7300, "source": "audit_log", "event_type": "user", "data": "Yes, that is the right reservation."}, - {"timestamp_ms": 7500, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Great, is there anything else I can help with?"}}, + {"timestamp_ms": 7500, "source": "framework", "event_type": "tts_text", "data": {"frame": "Great, is there anything else I can help with?"}}, {"timestamp_ms": 7550, "source": "audit_log", "event_type": "assistant", "data": "Great, is there anything else I can help with?"}, {"timestamp_ms": 7600, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 7.5}}, {"timestamp_ms": 7900, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Great, is there anything else I can help with?"}}}, {"timestamp_ms": 8200, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 8.1}}, - {"timestamp_ms": 8300, "source": "pipecat", "event_type": "turn_end", "data": {}}, + {"timestamp_ms": 8300, "source": "framework", "event_type": "turn_end", "data": {}}, {"timestamp_ms": 8500, "source": "elevenlabs", "event_type": "connection_state", "data": {"data": {"state": "session_ended"}}} ], "expected": { @@ -916,7 +916,7 @@ "id": "double_empty_user_audio_sessions_with_interruption", "description": "Two consecutive empty user audio bursts (ElevenLabs misses speech that Deepgram catches), followed by the user interrupting the assistant's response. The audit_log/user transcript and ElevenLabs user_speech must land at the same turn. Based on record 7.3.1.", "history": [ - {"timestamp_ms": 1000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, + {"timestamp_ms": 1000, "source": "framework", "event_type": "tts_text", "data": {"frame": "Hello, how can I help you?"}}, {"timestamp_ms": 1100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 1.0}}, {"timestamp_ms": 1500, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Hello, how can I help you?"}}}, {"timestamp_ms": 1800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 1.8}}, @@ -924,7 +924,7 @@ {"timestamp_ms": 2300, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "I need help rebooking my flight."}}}, {"timestamp_ms": 2800, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 2.8}}, {"timestamp_ms": 2900, "source": "audit_log", "event_type": "user", "data": "I need help rebooking my flight."}, - {"timestamp_ms": 3000, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "Sure. Was your flight canceled or delayed?"}}, + {"timestamp_ms": 3000, "source": "framework", "event_type": "tts_text", "data": {"frame": "Sure. Was your flight canceled or delayed?"}}, {"timestamp_ms": 3050, "source": "audit_log", "event_type": "assistant", "data": "Sure. Was your flight canceled or delayed?"}, {"timestamp_ms": 3100, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 3.0}}, {"timestamp_ms": 3400, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "Sure. Was your flight canceled or delayed?"}}}, @@ -938,13 +938,13 @@ {"timestamp_ms": 5600, "source": "audit_log", "event_type": "tool_response", "data": {"tool": "get_disruption_info", "response": {"status": "error", "error_type": "not_found"}}}, {"timestamp_ms": 5900, "source": "audit_log", "event_type": "assistant", "data": "I can not find that flight. Let me transfer you to an agent."}, {"timestamp_ms": 6000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 5.9}}, - {"timestamp_ms": 6100, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I can not find that flight."}}, + {"timestamp_ms": 6100, "source": "framework", "event_type": "tts_text", "data": {"frame": "I can not find that flight."}}, {"timestamp_ms": 6200, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.2}}, {"timestamp_ms": 6300, "source": "elevenlabs", "event_type": "user_speech", "data": {"data": {"text": "Yeah, my flight was canceled and I need a hotel voucher."}}}, {"timestamp_ms": 6400, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "pipecat_agent", "audio_timestamp": 6.4}}, {"timestamp_ms": 6500, "source": "audit_log", "event_type": "user", "data": "And I need a hotel voucher."}, {"timestamp_ms": 6700, "source": "elevenlabs", "event_type": "audio_end", "data": {"user": "elevenlabs_user", "audio_timestamp": 6.7}}, - {"timestamp_ms": 6900, "source": "pipecat", "event_type": "tts_text", "data": {"frame": "I'm sorry, I encountered an error."}}, + {"timestamp_ms": 6900, "source": "framework", "event_type": "tts_text", "data": {"frame": "I'm sorry, I encountered an error."}}, {"timestamp_ms": 6950, "source": "audit_log", "event_type": "assistant", "data": "I'm sorry, I encountered an error."}, {"timestamp_ms": 7000, "source": "elevenlabs", "event_type": "audio_start", "data": {"user": "pipecat_agent", "audio_timestamp": 6.9}}, {"timestamp_ms": 7300, "source": "elevenlabs", "event_type": "assistant_speech", "data": {"data": {"text": "I can not find that flight. I'm sorry, I encountered an error."}}}, diff --git a/tests/integration/test_metrics_end_to_end.py b/tests/integration/test_metrics_end_to_end.py index fb3d45ac..b1bb5c63 100644 --- a/tests/integration/test_metrics_end_to_end.py +++ b/tests/integration/test_metrics_end_to_end.py @@ -54,7 +54,7 @@ def mock_run_dir(tmp_path): # Rewrite result.json with corrected paths (originals point to the run output dir) result_data = json.loads((record_dir / "result.json").read_text()) result_data["output_dir"] = str(record_dir) - result_data["pipecat_logs_path"] = str(record_dir / "pipecat_logs.jsonl") + result_data["framework_logs_path"] = str(record_dir / "framework_logs.jsonl") result_data["elevenlabs_logs_path"] = str(record_dir / "elevenlabs_events.jsonl") # Fix audio paths if present for audio_key in ("audio_mixed_path", "audio_assistant_path", "audio_user_path"): diff --git a/tests/integration/test_processor_real_artifacts.py b/tests/integration/test_processor_real_artifacts.py index 84b7d20f..c0571727 100644 --- a/tests/integration/test_processor_real_artifacts.py +++ b/tests/integration/test_processor_real_artifacts.py @@ -48,7 +48,7 @@ def processor_context() -> _ProcessorContext: result_data = json.loads((ARTIFACTS_DIR / "result.json").read_text()) # Fix paths to point to the local artifacts directory result_data["output_dir"] = str(ARTIFACTS_DIR) - result_data["pipecat_logs_path"] = str(ARTIFACTS_DIR / "pipecat_logs.jsonl") + result_data["framework_logs_path"] = str(ARTIFACTS_DIR / "framework_logs.jsonl") result_data["elevenlabs_logs_path"] = str(ARTIFACTS_DIR / "elevenlabs_events.jsonl") result_data["audio_mixed_path"] = str(ARTIFACTS_DIR / "audio_mixed.wav") result_data["audio_assistant_path"] = None diff --git a/tests/unit/assistant/test_server.py b/tests/unit/assistant/test_server.py index 6b2a9f14..8e355217 100644 --- a/tests/unit/assistant/test_server.py +++ b/tests/unit/assistant/test_server.py @@ -9,12 +9,12 @@ import pytest from eva.assistant.agentic.audit_log import AuditLog -from eva.assistant.server import SAMPLE_RATE, AssistantServer +from eva.assistant.server import SAMPLE_RATE, PipecatAssistantServer def _make_server(tmp_path: Path): """Build a lightweight AssistantServer without invoking __init__ (avoids Pipecat I/O).""" - srv = object.__new__(AssistantServer) + srv = object.__new__(PipecatAssistantServer) srv.output_dir = tmp_path srv.audit_log = AuditLog() srv.agentic_system = None diff --git a/tests/unit/utils/test_conversation_checks.py b/tests/unit/utils/test_conversation_checks.py index 4a542393..22aa2e72 100644 --- a/tests/unit/utils/test_conversation_checks.py +++ b/tests/unit/utils/test_conversation_checks.py @@ -81,17 +81,17 @@ def test_check_conversation_finished_no_details(record_dir): assert check_conversation_finished(record_dir) is False -def _write_pipecat_logs(output_dir, record_id, entries): - """Helper to write pipecat_logs.jsonl for a record.""" +def _write_framework_logs(output_dir, record_id, entries): + """Helper to write framework_logs.jsonl for a record.""" record_path = output_dir / "records" / record_id record_path.mkdir(parents=True, exist_ok=True) - logs_path = record_path / "pipecat_logs.jsonl" + logs_path = record_path / "framework_logs.jsonl" logs_path.write_text("\n".join(json.dumps(e) for e in entries) + "\n") def test_find_records_with_llm_generic_error_detects_error(temp_dir): """Test that records containing the generic LLM error are detected.""" - _write_pipecat_logs( + _write_framework_logs( temp_dir, "1.1.1", [ @@ -99,7 +99,7 @@ def test_find_records_with_llm_generic_error_detects_error(temp_dir): {"type": "llm_response", "data": {"frame": "I'm sorry, I encountered an error processing your request."}}, ], ) - _write_pipecat_logs( + _write_framework_logs( temp_dir, "1.1.2", [ @@ -113,14 +113,14 @@ def test_find_records_with_llm_generic_error_detects_error(temp_dir): def test_find_records_with_llm_generic_error_no_errors(temp_dir): """Test that no records are returned when there are no generic LLM errors.""" - _write_pipecat_logs( + _write_framework_logs( temp_dir, "1.1.1", [ {"type": "llm_response", "data": {"frame": "Hello, how can I help you?"}}, ], ) - _write_pipecat_logs( + _write_framework_logs( temp_dir, "1.1.2", [ @@ -133,6 +133,6 @@ def test_find_records_with_llm_generic_error_no_errors(temp_dir): def test_find_records_with_llm_generic_error_missing_logs(temp_dir): - """Test that records without pipecat_logs.jsonl are skipped.""" + """Test that records without framework_logs.jsonl are skipped.""" result = find_records_with_llm_generic_error(temp_dir, ["nonexistent"]) assert result == []