ServiceNow · katstankiewicz · Mar 30, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -21,16 +21,15 @@ license = "MIT"
 dependencies = [
     "pydantic>=2.0",
     "pydantic-settings>=2.0",
-    "pipecat-ai==0.0.104",
+    "pipecat-ai>=0.0.108",
     "elevenlabs>=1.0.0",
     "openai>=1.0.0",
     "anthropic>=0.83.0",
     "litellm>=1.30.0",
-    "deepgram-sdk>=3.5.0,<4.0.0",
+    "deepgram-sdk>=6.1.1,<7",
     "onnxruntime>=1.16.0",
     "aioboto3>=12.0.0",
-    "google-generativeai>=0.3.0",
-    "google-genai>=0.3.0",
+    "google-genai>=1.69.0",
     "google-cloud-speech>=2.0.0",
     "google-cloud-texttospeech>=2.0.0",
     "azure-cognitiveservices-speech>=1.31.0",
@@ -54,7 +53,6 @@ dependencies = [
     "jaconv>=0.3.0",
     "regex>=2023.0.0",
     "more-itertools>=10.0.0",
-    "nvidia-riva-client>=2.25.0,<2.25.1"
 ]
 
 [project.optional-dependencies]
@@ -73,6 +71,7 @@ apps = [
     "streamlit-diff-viewer>=0.0.2",
 ]
 
+
 [project.scripts]
 eva = "eva.cli:main"
 

diff --git a/src/eva/__init__.py b/src/eva/__init__.py
@@ -3,11 +3,11 @@
 End-to-end evaluation framework for voice assistants using Pipecat and ElevenLabs.
 """
 
-__version__ = "0.1.0"
+__version__ = "0.1.2"
 
 # Bump simulation_version when changes affect benchmark outputs (agent code,
 # user simulator, orchestrator, simulation prompts, agent configs, tool mocks).
-simulation_version = "0.1.0"
+simulation_version = "0.1.2"
 
 # Bump metrics_version when changes affect metric computation (metrics code,
 # judge prompts, pricing tables, postprocessor).

diff --git a/src/eva/assistant/agentic/audit_log.py b/src/eva/assistant/agentic/audit_log.py
@@ -2,6 +2,7 @@
 
 import json
 import time
+from datetime import datetime
 from enum import StrEnum
 from pathlib import Path
 from typing import Any, Optional
@@ -18,6 +19,19 @@ def current_timestamp_ms() -> str:
     return str(int(round(time.time() * 1000)))
 
 
+def convert_to_epoch_ms(timestamp: str) -> str:
+    """Convert a timestamp to epoch milliseconds string.
+
+    If the timestamp is already in epoch milliseconds format (all digits),
+    return it as-is. Otherwise, parse as ISO 8601 and convert.
+    """
+    if timestamp.isdigit():
+        return timestamp
+
+    dt = datetime.fromisoformat(timestamp)
+    return str(int(dt.timestamp() * 1000))
+
+
 class MessageRole(StrEnum):
     """Message roles in a conversation."""
 

diff --git a/src/eva/assistant/pipeline/nvidia_baseten.py b/src/eva/assistant/pipeline/nvidia_baseten.py
@@ -1,10 +1,29 @@
-import riva.client
+from __future__ import annotations
+
 from pipecat.frames.frames import CancelFrame, EndFrame
-from pipecat.services.nvidia.stt import NvidiaSTTService
-from pipecat.services.nvidia.tts import NvidiaTTSService
 
 from eva.utils.logging import get_logger
 
+try:
+    import riva.client as riva_client  # type: ignore[import-untyped]
+    from pipecat.services.nvidia.stt import NvidiaSTTService
+    from pipecat.services.nvidia.tts import NvidiaTTSService
+
+    _NVIDIA_AVAILABLE = True
+except ImportError:
+    riva_client = None  # type: ignore[assignment]
+    _NVIDIA_AVAILABLE = False
+    NvidiaSTTService = object  # type: ignore[misc]
+    NvidiaTTSService = object  # type: ignore[misc]
+
+
+def _check_nvidia_available():
+    if not _NVIDIA_AVAILABLE:
+        raise ImportError(
+            "nvidia-riva-client is required for Baseten services. Install it with: pip install nvidia-riva-client"
+        )
+
+
 logger = get_logger(__name__)
 
 
@@ -26,6 +45,7 @@ class BasetenSTTService(NvidiaSTTService):
     """NvidiaSTTService that authenticates against a Baseten-hosted Riva deployment."""
 
     def __init__(self, *, api_key: str, base_url: str, **kwargs):
+        _check_nvidia_available()
         # Extract "model-{id}" from "model-{id}.grpc.api.baseten.co:443"
         model_id_header = base_url.split(".")[0]
         super().__init__(
@@ -42,8 +62,8 @@ def _initialize_client(self):
             ("baseten-authorization", f"Api-Key {self._api_key}"),
             ("baseten-model-id", self._function_id),
         ]
-        self._auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
-        self._asr_service = riva.client.ASRService(self._auth)
+        self._auth = riva_client.Auth(None, self._use_ssl, self._server, metadata)
+        self._asr_service = riva_client.ASRService(self._auth)
 
     def _cleanup(self):
         _close_grpc_channel(self._auth, "STT")
@@ -65,6 +85,7 @@ class BasetenTTSService(NvidiaTTSService):
     """NvidiaTTSService that authenticates against a Baseten-hosted Riva deployment."""
 
     def __init__(self, *, api_key: str, base_url: str, **kwargs):
+        _check_nvidia_available()
         # Extract "model-{id}" from "model-{id}.grpc.api.baseten.co:443"
         model_id_header = base_url.split(".")[0]
         super().__init__(
@@ -83,8 +104,8 @@ def _initialize_client(self):
             ("baseten-authorization", f"Api-Key {self._api_key}"),
             ("baseten-model-id", self._function_id),
         ]
-        self._auth = riva.client.Auth(None, self._use_ssl, self._server, metadata)
-        self._service = riva.client.SpeechSynthesisService(self._auth)
+        self._auth = riva_client.Auth(None, self._use_ssl, self._server, metadata)
+        self._service = riva_client.SpeechSynthesisService(self._auth)
 
     def _cleanup(self):
         _close_grpc_channel(self._auth, "TTS")

diff --git a/src/eva/assistant/pipeline/observers.py b/src/eva/assistant/pipeline/observers.py
@@ -21,6 +21,7 @@
 from pipecat.observers.base_observer import BaseObserver, FramePushed
 from pipecat.observers.turn_tracking_observer import TurnTrackingObserver
 from pipecat.services.azure.realtime.llm import AzureRealtimeLLMService
+from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService
 from pipecat.services.llm_service import LLMService
 from pipecat.services.openai.realtime.llm import OpenAIRealtimeLLMService
 from pipecat.services.stt_service import STTService
@@ -32,7 +33,7 @@
 logger = get_logger(__name__)
 
 
-_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService, OpenAIRealtimeLLMService)
+_TRANSCRIPTION_SERVICES = (STTService, AzureRealtimeLLMService, OpenAIRealtimeLLMService, GeminiLiveLLMService)
 
 
 class WallClock(SystemClock):

diff --git a/src/eva/assistant/pipeline/services.py b/src/eva/assistant/pipeline/services.py
@@ -6,7 +6,6 @@
 import datetime
 from typing import Any, AsyncGenerator, Optional
 
-from deepgram import LiveOptions
 from openai import AsyncAzureOpenAI, BadRequestError
 from pipecat.frames.frames import (
     ErrorFrame,
@@ -51,11 +50,14 @@
 
 # Conditional Gemini imports - may fail if google-genai package version is incompatible
 try:
+    from pipecat.services.google.gemini_live.llm import GeminiLiveLLMService, GeminiVADParams
     from pipecat.services.google.tts import GeminiTTSService
 
     GEMINI_AVAILABLE = True
 except ImportError:
     # Gemini services unavailable - will fail at runtime if requested
+    GeminiLiveLLMService = None
+    GeminiVADParams = None
     GeminiTTSService = None
     GEMINI_AVAILABLE = False
 from pipecat.adapters.schemas.function_schema import FunctionSchema
@@ -146,11 +148,9 @@ def create_stt_service(
         logger.info(f"Using Deepgram STT: {params['model']}")
         return DeepgramSTTService(
             api_key=api_key,
-            live_options=LiveOptions(
+            settings=DeepgramSTTService.Settings(
                 language=language_code,
                 model=params["model"],
-                encoding="linear16",
-                sample_rate=SAMPLE_RATE,
                 interim_results=True,
             ),
             sample_rate=SAMPLE_RATE,
@@ -465,8 +465,29 @@ def create_realtime_llm_service(
             one_shot_selected_tools=pipecat_tools,
         )
 
+    elif model_lower == "gemini-live":
+        if not GEMINI_AVAILABLE:
+            raise ValueError(
+                "Gemini Live requested but Gemini services are unavailable. "
+                "Check google-genai package installation and version compatibility."
+            )
+
+        gemini_model = params.get("model")
+        logger.info(f"Using Gemini Live LLM: {gemini_model}")
+
+        return GeminiLiveLLMService(
+            api_key=params["api_key"],
+            tools=pipecat_tools,
+            settings=GeminiLiveLLMService.Settings(
+                model=gemini_model,
+                system_instruction=system_prompt,
+                voice=params.get("voice", "Puck"),  # Aoede, Charon, Fenrir, Kore, Puck
+                vad=GeminiVADParams(disabled=params.get("vad_disabled", True)),
+            ),
+        )
+
     else:
-        raise ValueError(f"Unknown realtime model: {model}. Available: gpt-realtime, ultravox")
+        raise ValueError(f"Unknown realtime model: {model}. Available: gpt-realtime, ultravox, gemini-live")
 
 
 def get_openai_session_properties(system_prompt: str, params: dict, pipecat_tools) -> SessionProperties:

diff --git a/src/eva/assistant/server.py b/src/eva/assistant/server.py
@@ -45,7 +45,7 @@
 from pipecat.turns.user_turn_strategies import ExternalUserTurnStrategies, UserTurnStrategies
 from pipecat.utils.time import time_now_iso8601
 
-from eva.assistant.agentic.audit_log import AuditLog, current_timestamp_ms
+from eva.assistant.agentic.audit_log import AuditLog, convert_to_epoch_ms, current_timestamp_ms
 from eva.assistant.pipeline.agent_processor import BenchmarkAgentProcessor, UserAudioCollector, UserObserver
 from eva.assistant.pipeline.audio_llm_processor import (
     AudioLLMProcessor,
@@ -734,6 +734,9 @@ async def on_user_turn_stopped(aggregator, strategy, message: UserTurnStoppedMes
                         timestamp_ms=self._user_turn_started_wall_ms,
                     )
                     self._user_turn_started_wall_ms = None
+                    await self._save_transcript_message_from_turn(
+                        role="user", content=message.content, timestamp=self._user_turn_started_wall_ms
+                    )
 
         @user_aggregator.event_handler("on_user_turn_started")
         async def on_user_turn_started(aggregator, strategy):
@@ -754,9 +757,12 @@ async def on_assistant_turn_stopped(aggregator, message: AssistantTurnStoppedMes
                 # Prefer content from the aggregator (populated when output_modalities includes
                 # "text").
                 content = message.content
-                self.audit_log.append_assistant_output(content or "[audio response - transcription unavailable]")
+                self.audit_log.append_assistant_output(
+                    content or "[audio response - transcription unavailable]",
+                    timestamp_ms=convert_to_epoch_ms(message.timestamp),
+                )
                 await self._save_transcript_message_from_turn(
-                    role="assistant", content=content, timestamp=message.timestamp
+                    role="assistant", content=content, timestamp=convert_to_epoch_ms(message.timestamp)
                 )
 
     async def _save_transcript_message_from_turn(self, role: str, content: str, timestamp: str) -> None:

diff --git a/tests/unit/assistant/test_audit_log.py b/tests/unit/assistant/test_audit_log.py
@@ -7,6 +7,7 @@
     ConversationMessage,
     LLMCall,
     MessageRole,
+    convert_to_epoch_ms,
     current_timestamp_ms,
 )
 
@@ -23,6 +24,17 @@ def test_returns_millisecond_epoch(self):
         assert ts > 1_000_000_000_000
 
 
+class TestConvertToEpochMs:
+    def test_returns_string(self):
+        result = convert_to_epoch_ms("2024-01-01T00:00:00Z")
+        assert isinstance(result, str)
+        assert result == "1704067200000"
+
+    def test_returns_millisecond_epoch(self):
+        result = convert_to_epoch_ms("1774987946485")
+        assert result == "1774987946485"
+
+
 class TestConversationMessage:
     def test_to_dict_excludes_none_fields(self):
         msg = ConversationMessage(role=MessageRole.USER, content="hello")