Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -213,5 +213,5 @@ EVA_DEBUG=false
# Example: EVA_RECORD_IDS=1.2.1,1.2.2,1.3.1
EVA_RECORD_IDS=

# Logging level (DEBUG | INFO | WARNING | ERROR)
# Logging level (DEBUG | INFO | WARNING | ERROR | CRITICAL)
EVA_LOG_LEVEL=INFO
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ cp .env.example .env

> [!TIP]
> After installation, you can run EVA using either:
> - `eva` — CLI entry point (e.g., `eva --domain airline`)
> - `python main.py` — script at the repo root (e.g., `python main.py --domain airline`)
> - `eva` — CLI entry point (e.g., `eva --help`)
> - `python main.py` — script at the repo root (e.g., `python main.py --help`)
>
> If using an IDE, point your Python interpreter to `.venv/bin/python` so commands run in the virtual environment automatically. Otherwise, prefix commands with `uv run` or activate the environment with `source .venv/bin/activate`.

Expand Down
8 changes: 7 additions & 1 deletion src/eva/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,20 @@
import asyncio
import sys

from pydantic import ValidationError


def main():
"""Entry point for the `eva` console script."""
# Import config first (lightweight) for fast --help and validation errors.
# Heavy deps (pipecat, litellm, etc.) are imported only in run_benchmark.
from eva.models.config import RunConfig

config = RunConfig(_cli_parse_args=True, _env_file=".env")
try:
config = RunConfig(_cli_parse_args=True, _env_file=".env")
except ValidationError as e:
print(e, file=sys.stderr)
sys.exit(1)

from eva.run_benchmark import run_benchmark

Expand Down
14 changes: 4 additions & 10 deletions src/eva/models/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ class PipelineConfig(BaseModel):
description="LLM model name matching a model_name in --model-list/EVA_MODEL_LIST",
examples=["gpt-5.2", "gemini-3-pro"],
)
stt: str | None = Field(None, description="STT model", examples=["deepgram", "openai_whisper"])
tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"])
stt: str = Field(description="STT model", examples=["deepgram", "openai_whisper"])
tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"])

stt_params: dict[str, Any] = Field({}, description="Additional STT model parameters (JSON)")
tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)")
Expand Down Expand Up @@ -115,7 +115,7 @@ class AudioLLMConfig(BaseModel):
{},
description="Audio-LLM parameters (JSON): base_url (required), api_key, model, temperature, max_tokens",
)
tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"])
tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"])
tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)")


Expand Down Expand Up @@ -288,7 +288,7 @@ class ModelDeployment(DeploymentTypedDict):
)

# Data paths
domain: str = "airline"
domain: Literal["airline"] = "airline"

# Rerun settings
max_rerun_attempts: int = Field(3, ge=0, le=20, description="Maximum number of rerun attempts for failed records")
Expand Down Expand Up @@ -442,15 +442,9 @@ def _warn_deprecated_aliases(cls, data: Any) -> Any:
def _check_companion_services(self) -> "RunConfig":
"""Ensure required companion services are set for each pipeline mode."""
if isinstance(self.model, PipelineConfig):
if not self.model.stt:
raise ValueError("EVA_MODEL__STT is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).")
if not self.model.tts:
raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).")
self._validate_service_params("STT", self.model.stt, self.model.stt_params)
self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
elif isinstance(self.model, AudioLLMConfig):
if not self.model.tts:
raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__AUDIO_LLM (SpeechLM-TTS pipeline).")
self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
return self

Expand Down
90 changes: 43 additions & 47 deletions tests/unit/models/test_config_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,60 +204,61 @@ def test_invalid_model_list(self, environ, expected_exception, expected_message)
_config(env_vars=environ)

@pytest.mark.parametrize(
"environ, expected_exception, expected_message",
"environ, expected_message",
(
(
{},
ValidationError,
r"model\s+Field required",
),
(
{"EVA_MODEL": "{}"},
ValidationError,
# Discriminator defaults to PipelineConfig when no unique field present
r"model\.pipeline\.llm\s+Field required",
),
(
{"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"},
"Multiple pipeline modes set",
),
(
{"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"},
"Multiple pipeline modes set",
),
(
{"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"},
"Multiple pipeline modes set",
),
(
{"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"},
"Multiple pipeline modes set",
),
(
{"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"},
r"model\.pipeline\.stt\s+Field required",
),
(
{"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"},
r"model\.pipeline\.tts\s+Field required",
),
(
{"EVA_MODEL__AUDIO_LLM": "ultravox"},
r"model\.audio_llm\.tts\s+Field required",
),
),
ids=(
"Missing",
"Empty",
"Mixed LLM + S2S",
"Mixed LLM + Audio LLM",
"Mixed S2S + Audio LLM",
"Mixed all three",
"LLM without STT",
"LLM without TTS",
"Audio LLM without TTS",
),
)
def test_model_missing_or_empty(self, environ, expected_exception, expected_message):
environ |= _EVA_MODEL_LIST_ENV
with pytest.raises(expected_exception, match=expected_message):
_config(env_vars=environ)

def test_mixed_mode_fields_raises_error(self):
"""Multiple pipeline mode indicators cause a clear error."""
# llm + s2s
with pytest.raises(ValueError, match="Multiple pipeline modes set"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"})

# llm + audio_llm
with pytest.raises(ValueError, match="Multiple pipeline modes set"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"})

# s2s + audio_llm
with pytest.raises(ValueError, match="Multiple pipeline modes set"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"})

# all three
with pytest.raises(ValueError, match="Multiple pipeline modes set"):
_config(
env_vars=_EVA_MODEL_LIST_ENV
| {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"}
)

def test_missing_companion_services(self):
"""Required companion services cause a clear error when missing."""
# LLM without STT
with pytest.raises(ValueError, match="EVA_MODEL__STT is required"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"})

# LLM without TTS
with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"})

# Audio-LLM without TTS
with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"):
_config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__AUDIO_LLM": "ultravox"})
def test_invalid_model_pipeline(self, environ, expected_message):
with pytest.raises(ValidationError, match=expected_message):
_config(env_vars=_EVA_MODEL_LIST_ENV | environ)

def test_missing_stt_tts_params(self):
"""Missing api_key or model in STT/TTS params causes a clear error."""
Expand Down Expand Up @@ -583,11 +584,6 @@ def test_realtime_model(self):
config = _config(env_vars=_EVA_MODEL_LIST_ENV, cli_args=["--realtime-model", "test-model"])
assert config.model.s2s == "test-model"

def test_domain_cli(self):
"""--domain sets derived paths."""
c = _config(env_vars=_BASE_ENV, cli_args=["--domain", "my_domain"])
assert c.agent_config_path == Path("configs/agents/my_domain_agent.yaml")

def test_run_id(self):
c = _config(env_vars=_BASE_ENV, cli_args=["--run-id", "my-run"])
assert c.run_id == "my-run"
Expand Down
Loading