Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,15 @@ license = "MIT"
dependencies = [
"pydantic>=2.0",
"pydantic-settings>=2.0",
"pipecat-ai==0.0.104",
"pipecat-ai>=0.0.108",
"elevenlabs>=1.0.0",
"openai>=1.0.0",
"anthropic>=0.83.0",
"litellm>=1.30.0",
"deepgram-sdk>=3.5.0,<4.0.0",
"deepgram-sdk>=6.1.1,<7",
"onnxruntime>=1.16.0",
"aioboto3>=12.0.0",
"google-generativeai>=0.3.0",
"google-genai>=0.3.0",
"google-genai>=1.69.0",
"google-cloud-speech>=2.0.0",
"google-cloud-texttospeech>=2.0.0",
"azure-cognitiveservices-speech>=1.31.0",
Expand All @@ -54,7 +53,6 @@ dependencies = [
"jaconv>=0.3.0",
"regex>=2023.0.0",
"more-itertools>=10.0.0",
"nvidia-riva-client>=2.25.0,<2.25.1"
]

[project.optional-dependencies]
Expand All @@ -73,6 +71,7 @@ apps = [
"streamlit-diff-viewer>=0.0.2",
]


[project.scripts]
eva = "eva.cli:main"

Expand Down
4 changes: 2 additions & 2 deletions src/eva/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
End-to-end evaluation framework for voice assistants using Pipecat and ElevenLabs.
"""

__version__ = "0.1.0"
__version__ = "0.1.2"

# Bump simulation_version when changes affect benchmark outputs (agent code,
# user simulator, orchestrator, simulation prompts, agent configs, tool mocks).
simulation_version = "0.1.0"
simulation_version = "0.1.2"

# Bump metrics_version when changes affect metric computation (metrics code,
# judge prompts, pricing tables, postprocessor).
Expand Down
14 changes: 14 additions & 0 deletions src/eva/assistant/agentic/audit_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import time
from datetime import datetime
from enum import StrEnum
from pathlib import Path
from typing import Any, Optional
Expand All @@ -18,6 +19,19 @@ def current_timestamp_ms() -> str:
return str(int(round(time.time() * 1000)))


def convert_to_epoch_ms(timestamp: str) -> str:
"""Convert a timestamp to epoch milliseconds string.

If the timestamp is already in epoch milliseconds format (all digits),
return it as-is. Otherwise, parse as ISO 8601 and convert.
"""
if timestamp.isdigit():
return timestamp

dt = datetime.fromisoformat(timestamp)
return str(int(dt.timestamp() * 1000))


class MessageRole(StrEnum):
"""Message roles in a conversation."""

Expand Down
35 changes: 28 additions & 7 deletions src/eva/assistant/pipeline/nvidia_baseten.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
import riva.client
from __future__ import annotations

from pipecat.frames.frames import CancelFrame, EndFrame
from pipecat.services.nvidia.stt import NvidiaSTTService
from pipecat.services.nvidia.tts import NvidiaTTSService

from eva.utils.logging import get_logger

try:
import riva.client as riva_client # type: ignore[import-untyped]
from pipecat.services.nvidia.stt import NvidiaSTTService
from pipecat.services.nvidia.tts import NvidiaTTSService

_NVIDIA_AVAILABLE = True
except ImportError:
riva_client = None # type: ignore[assignment]
_NVIDIA_AVAILABLE = False
NvidiaSTTService = object # type: ignore[misc]
NvidiaTTSService = object # type: ignore[misc]


def _check_nvidia_available():
if not _NVIDIA_AVAILABLE:
raise ImportError(
"nvidia-riva-client is required for Baseten services. Install it with: pip install nvidia-riva-client"
)


logger = get_logger(__name__)


Expand All @@ -26,6 +45,7 @@ class BasetenSTTService(NvidiaSTTService):
"""NvidiaSTTService that authenticates against a Baseten-hosted Riva deployment."""

def __init__(self, *, api_key: str, base_url: str, **kwargs):
_check_nvidia_available()
# Extract "model-{id}" from "model-{id}.grpc.api.baseten.co:443"
model_id_header = base_url.split(".")[0]
super().__init__(
Expand All @@ -42,8 +62,8 @@ def _initialize_client(self):
("baseten-authorization", f"Api-Key {self._api_key}"),
("baseten-model-id", self._function_id),
]
self._auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
self._asr_service = riva.client.ASRService(self._auth)
self._auth = riva_client.Auth(None, self._use_ssl, self._server, metadata)
self._asr_service = riva_client.ASRService(self._auth)

def _cleanup(self):
_close_grpc_channel(self._auth, "STT")
Expand All @@ -65,6 +85,7 @@ class BasetenTTSService(NvidiaTTSService):
"""NvidiaTTSService that authenticates against a Baseten-hosted Riva deployment."""

def __init__(self, *, api_key: str, base_url: str, **kwargs):
_check_nvidia_available()
# Extract "model-{id}" from "model-{id}.grpc.api.baseten.co:443"
model_id_header = base_url.split(".")[0]
super().__init__(
Expand All @@ -83,8 +104,8 @@ def _initialize_client(self):
("baseten-authorization", f"Api-Key {self._api_key}"),
("baseten-model-id", self._function_id),
]
self._auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
self._service = riva.client.SpeechSynthesisService(self._auth)
self._auth = riva_client.Auth(None, self._use_ssl, self._server, metadata)
self._service = riva_client.SpeechSynthesisService(self._auth)

def _cleanup(self):
_close_grpc_channel(self._auth, "TTS")
Expand Down
3 changes: 2 additions & 1 deletion src/eva/assistant/pipeline/observers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pipecat.observers.base_observer import BaseObserver, FramePushed
from pipecat.observers.turn_tracking_observer import TurnTrackingObserver
from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
from pipecat.services.llm_service import LLMService
from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
from pipecat.services.stt_service import STTService
Expand All @@ -32,7 +33,7 @@
logger = get_logger(__name__)


_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService, OpenAIRealtimeLLMService)
_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService, OpenAIRealtimeLLMService, GeminiLiveLLMService)


class WallClock(SystemClock):
Expand Down
31 changes: 26 additions & 5 deletions src/eva/assistant/pipeline/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
import datetime
from typing import Any, AsyncGenerator, Optional

from deepgram import LiveOptions
from openai import AsyncAzureOpenAI, BadRequestError
from pipecat.frames.frames import (
ErrorFrame,
Expand Down Expand Up @@ -51,11 +50,14 @@

# Conditional Gemini imports - may fail if google-genai package version is incompatible
try:
from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService, GeminiVADParams
from pipecat.services.google.tts import GeminiTTSService

GEMINI_AVAILABLE = True
except ImportError:
# Gemini services unavailable - will fail at runtime if requested
GeminiLiveLLMService = None
GeminiVADParams = None
GeminiTTSService = None
GEMINI_AVAILABLE = False
from pipecat.adapters.schemas.function_schema import FunctionSchema
Expand Down Expand Up @@ -146,11 +148,9 @@ def create_stt_service(
logger.info(f"Using Deepgram STT: {params['model']}")
return DeepgramSTTService(
api_key=api_key,
live_options=LiveOptions(
settings=DeepgramSTTService.Settings(
language=language_code,
model=params["model"],
encoding="linear16",
sample_rate=SAMPLE_RATE,
interim_results=True,
),
sample_rate=SAMPLE_RATE,
Expand Down Expand Up @@ -465,8 +465,29 @@ def create_realtime_llm_service(
one_shot_selected_tools=pipecat_tools,
)

elif model_lower == "gemini-live":
if not GEMINI_AVAILABLE:
raise ValueError(
"Gemini Live requested but Gemini services are unavailable. "
"Check google-genai package installation and version compatibility."
)

gemini_model = params.get("model")
logger.info(f"Using Gemini Live LLM: {gemini_model}")

return GeminiLiveLLMService(
api_key=params["api_key"],
tools=pipecat_tools,
settings=GeminiLiveLLMService.Settings(
model=gemini_model,
system_instruction=system_prompt,
voice=params.get("voice", "Puck"), # Aoede, Charon, Fenrir, Kore, Puck
vad=GeminiVADParams(disabled=params.get("vad_disabled", True)),
),
)

else:
raise ValueError(f"Unknown realtime model: {model}. Available: gpt-realtime, ultravox")
raise ValueError(f"Unknown realtime model: {model}. Available: gpt-realtime, ultravox, gemini-live")


def get_openai_session_properties(system_prompt: str, params: dict, pipecat_tools) -> SessionProperties:
Expand Down
12 changes: 9 additions & 3 deletions src/eva/assistant/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies, UserTurnStrategies
from pipecat.utils.time import time_now_iso8601

from eva.assistant.agentic.audit_log import AuditLog, current_timestamp_ms
from eva.assistant.agentic.audit_log import AuditLog, convert_to_epoch_ms, current_timestamp_ms
from eva.assistant.pipeline.agent_processor import BenchmarkAgentProcessor, UserAudioCollector, UserObserver
from eva.assistant.pipeline.audio_llm_processor import (
AudioLLMProcessor,
Expand Down Expand Up @@ -734,6 +734,9 @@ async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMes
timestamp_ms=self._user_turn_started_wall_ms,
)
self._user_turn_started_wall_ms = None
await self._save_transcript_message_from_turn(
role="user", content=message.content, timestamp=self._user_turn_started_wall_ms
)

@user_aggregator.event_handler("on_user_turn_started")
async def on_user_turn_started(aggregator, strategy):
Expand All @@ -754,9 +757,12 @@ async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMes
# Prefer content from the aggregator (populated when output_modalities includes
# "text").
content = message.content
self.audit_log.append_assistant_output(content or "[audio response - transcription unavailable]")
self.audit_log.append_assistant_output(
content or "[audio response - transcription unavailable]",
timestamp_ms=convert_to_epoch_ms(message.timestamp),
)
await self._save_transcript_message_from_turn(
role="assistant", content=content, timestamp=message.timestamp
role="assistant", content=content, timestamp=convert_to_epoch_ms(message.timestamp)
)

async def _save_transcript_message_from_turn(self, role: str, content: str, timestamp: str) -> None:
Expand Down
12 changes: 12 additions & 0 deletions tests/unit/assistant/test_audit_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
ConversationMessage,
LLMCall,
MessageRole,
convert_to_epoch_ms,
current_timestamp_ms,
)

Expand All @@ -23,6 +24,17 @@ def test_returns_millisecond_epoch(self):
assert ts > 1_000_000_000_000


class TestConvertToEpochMs:
def test_returns_string(self):
result = convert_to_epoch_ms("2024-01-01T00:00:00Z")
assert isinstance(result, str)
assert result == "1704067200000"

def test_returns_millisecond_epoch(self):
result = convert_to_epoch_ms("1774987946485")
assert result == "1774987946485"


class TestConversationMessage:
def test_to_dict_excludes_none_fields(self):
msg = ConversationMessage(role=MessageRole.USER, content="hello")
Expand Down
Loading