diff --git a/.env.example b/.env.example index 061dd906..7585eac7 100644 --- a/.env.example +++ b/.env.example @@ -213,5 +213,5 @@ EVA_DEBUG=false # Example: EVA_RECORD_IDS=1.2.1,1.2.2,1.3.1 EVA_RECORD_IDS= -# Logging level (DEBUG | INFO | WARNING | ERROR) +# Logging level (DEBUG | INFO | WARNING | ERROR | CRITICAL) EVA_LOG_LEVEL=INFO diff --git a/README.md b/README.md index e1049666..0ba0d575 100644 --- a/README.md +++ b/README.md @@ -48,8 +48,8 @@ cp .env.example .env > [!TIP] > After installation, you can run EVA using either: -> - `eva` — CLI entry point (e.g., `eva --domain airline`) -> - `python main.py` — script at the repo root (e.g., `python main.py --domain airline`) +> - `eva` — CLI entry point (e.g., `eva --help`) +> - `python main.py` — script at the repo root (e.g., `python main.py --help`) > > If using an IDE, point your Python interpreter to `.venv/bin/python` so commands run in the virtual environment automatically. Otherwise, prefix commands with `uv run` or activate the environment with `source .venv/bin/activate`. diff --git a/src/eva/cli.py b/src/eva/cli.py index b2afe391..94c1ca95 100644 --- a/src/eva/cli.py +++ b/src/eva/cli.py @@ -7,6 +7,8 @@ import asyncio import sys +from pydantic import ValidationError + def main(): """Entry point for the `eva` console script.""" @@ -14,7 +16,11 @@ def main(): # Heavy deps (pipecat, litellm, etc.) are imported only in run_benchmark. from eva.models.config import RunConfig - config = RunConfig(_cli_parse_args=True, _env_file=".env") + try: + config = RunConfig(_cli_parse_args=True, _env_file=".env") + except ValidationError as e: + print(e, file=sys.stderr) + sys.exit(1) from eva.run_benchmark import run_benchmark diff --git a/src/eva/models/config.py b/src/eva/models/config.py index cd8fe819..474d29a8 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -56,8 +56,8 @@ class PipelineConfig(BaseModel): description="LLM model name matching a model_name in --model-list/EVA_MODEL_LIST", examples=["gpt-5.2", "gemini-3-pro"], ) - stt: str | None = Field(None, description="STT model", examples=["deepgram", "openai_whisper"]) - tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"]) + stt: str = Field(description="STT model", examples=["deepgram", "openai_whisper"]) + tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"]) stt_params: dict[str, Any] = Field({}, description="Additional STT model parameters (JSON)") tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)") @@ -115,7 +115,7 @@ class AudioLLMConfig(BaseModel): {}, description="Audio-LLM parameters (JSON): base_url (required), api_key, model, temperature, max_tokens", ) - tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"]) + tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"]) tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)") @@ -288,7 +288,7 @@ class ModelDeployment(DeploymentTypedDict): ) # Data paths - domain: str = "airline" + domain: Literal["airline"] = "airline" # Rerun settings max_rerun_attempts: int = Field(3, ge=0, le=20, description="Maximum number of rerun attempts for failed records") @@ -442,15 +442,9 @@ def _warn_deprecated_aliases(cls, data: Any) -> Any: def _check_companion_services(self) -> "RunConfig": """Ensure required companion services are set for each pipeline mode.""" if isinstance(self.model, PipelineConfig): - if not self.model.stt: - raise ValueError("EVA_MODEL__STT is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).") - if not self.model.tts: - raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).") self._validate_service_params("STT", self.model.stt, self.model.stt_params) self._validate_service_params("TTS", self.model.tts, self.model.tts_params) elif isinstance(self.model, AudioLLMConfig): - if not self.model.tts: - raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__AUDIO_LLM (SpeechLM-TTS pipeline).") self._validate_service_params("TTS", self.model.tts, self.model.tts_params) return self diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 9b77854c..8248c39f 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -204,60 +204,61 @@ def test_invalid_model_list(self, environ, expected_exception, expected_message) _config(env_vars=environ) @pytest.mark.parametrize( - "environ, expected_exception, expected_message", + "environ, expected_message", ( ( {}, - ValidationError, r"model\s+Field required", ), ( {"EVA_MODEL": "{}"}, - ValidationError, # Discriminator defaults to PipelineConfig when no unique field present r"model\.pipeline\.llm\s+Field required", ), + ( + {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"}, + "Multiple pipeline modes set", + ), + ( + {"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"}, + "Multiple pipeline modes set", + ), + ( + {"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"}, + "Multiple pipeline modes set", + ), + ( + {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"}, + "Multiple pipeline modes set", + ), + ( + {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"}, + r"model\.pipeline\.stt\s+Field required", + ), + ( + {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"}, + r"model\.pipeline\.tts\s+Field required", + ), + ( + {"EVA_MODEL__AUDIO_LLM": "ultravox"}, + r"model\.audio_llm\.tts\s+Field required", + ), + ), + ids=( + "Missing", + "Empty", + "Mixed LLM + S2S", + "Mixed LLM + Audio LLM", + "Mixed S2S + Audio LLM", + "Mixed all three", + "LLM without STT", + "LLM without TTS", + "Audio LLM without TTS", ), ) - def test_model_missing_or_empty(self, environ, expected_exception, expected_message): - environ |= _EVA_MODEL_LIST_ENV - with pytest.raises(expected_exception, match=expected_message): - _config(env_vars=environ) - - def test_mixed_mode_fields_raises_error(self): - """Multiple pipeline mode indicators cause a clear error.""" - # llm + s2s - with pytest.raises(ValueError, match="Multiple pipeline modes set"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"}) - - # llm + audio_llm - with pytest.raises(ValueError, match="Multiple pipeline modes set"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"}) - - # s2s + audio_llm - with pytest.raises(ValueError, match="Multiple pipeline modes set"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"}) - - # all three - with pytest.raises(ValueError, match="Multiple pipeline modes set"): - _config( - env_vars=_EVA_MODEL_LIST_ENV - | {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"} - ) - - def test_missing_companion_services(self): - """Required companion services cause a clear error when missing.""" - # LLM without STT - with pytest.raises(ValueError, match="EVA_MODEL__STT is required"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"}) - - # LLM without TTS - with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"}) - - # Audio-LLM without TTS - with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"): - _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__AUDIO_LLM": "ultravox"}) + def test_invalid_model_pipeline(self, environ, expected_message): + with pytest.raises(ValidationError, match=expected_message): + _config(env_vars=_EVA_MODEL_LIST_ENV | environ) def test_missing_stt_tts_params(self): """Missing api_key or model in STT/TTS params causes a clear error.""" @@ -583,11 +584,6 @@ def test_realtime_model(self): config = _config(env_vars=_EVA_MODEL_LIST_ENV, cli_args=["--realtime-model", "test-model"]) assert config.model.s2s == "test-model" - def test_domain_cli(self): - """--domain sets derived paths.""" - c = _config(env_vars=_BASE_ENV, cli_args=["--domain", "my_domain"]) - assert c.agent_config_path == Path("configs/agents/my_domain_agent.yaml") - def test_run_id(self): c = _config(env_vars=_BASE_ENV, cli_args=["--run-id", "my-run"]) assert c.run_id == "my-run"