ServiceNow · JosephMarinier · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026 · Mar 31, 2026
diff --git a/.env.example b/.env.example
@@ -213,5 +213,5 @@ EVA_DEBUG=false
 # Example: EVA_RECORD_IDS=1.2.1,1.2.2,1.3.1
 EVA_RECORD_IDS=
 
-# Logging level (DEBUG | INFO | WARNING | ERROR)
+# Logging level (DEBUG | INFO | WARNING | ERROR | CRITICAL)
 EVA_LOG_LEVEL=INFO
diff --git a/README.md b/README.md
@@ -48,8 +48,8 @@ cp .env.example .env
 
 > [!TIP]
 > After installation, you can run EVA using either:
-> - `eva` — CLI entry point (e.g., `eva --domain airline`)
-> - `python main.py` — script at the repo root (e.g., `python main.py --domain airline`)
+> - `eva` — CLI entry point (e.g., `eva --help`)
+> - `python main.py` — script at the repo root (e.g., `python main.py --help`)
 >
 > If using an IDE, point your Python interpreter to `.venv/bin/python` so commands run in the virtual environment automatically. Otherwise, prefix commands with `uv run` or activate the environment with `source .venv/bin/activate`.
 

diff --git a/src/eva/cli.py b/src/eva/cli.py
@@ -7,14 +7,20 @@
 import asyncio
 import sys
 
+from pydantic import ValidationError
+
 
 def main():
     """Entry point for the `eva` console script."""
     # Import config first (lightweight) for fast --help and validation errors.
     # Heavy deps (pipecat, litellm, etc.) are imported only in run_benchmark.
     from eva.models.config import RunConfig
 
-    config = RunConfig(_cli_parse_args=True, _env_file=".env")
+    try:
+        config = RunConfig(_cli_parse_args=True, _env_file=".env")
+    except ValidationError as e:
+        print(e, file=sys.stderr)
+        sys.exit(1)
 
     from eva.run_benchmark import run_benchmark
 

diff --git a/src/eva/models/config.py b/src/eva/models/config.py
@@ -56,8 +56,8 @@ class PipelineConfig(BaseModel):
         description="LLM model name matching a model_name in --model-list/EVA_MODEL_LIST",
         examples=["gpt-5.2", "gemini-3-pro"],
     )
-    stt: str | None = Field(None, description="STT model", examples=["deepgram", "openai_whisper"])
-    tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"])
+    stt: str = Field(description="STT model", examples=["deepgram", "openai_whisper"])
+    tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"])
 
     stt_params: dict[str, Any] = Field({}, description="Additional STT model parameters (JSON)")
     tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)")
@@ -115,7 +115,7 @@ class AudioLLMConfig(BaseModel):
         {},
         description="Audio-LLM parameters (JSON): base_url (required), api_key, model, temperature, max_tokens",
     )
-    tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"])
+    tts: str = Field(description="TTS model", examples=["cartesia", "elevenlabs"])
     tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)")
 
 
@@ -288,7 +288,7 @@ class ModelDeployment(DeploymentTypedDict):
     )
 
     # Data paths
-    domain: str = "airline"
+    domain: Literal["airline"] = "airline"
 
     # Rerun settings
     max_rerun_attempts: int = Field(3, ge=0, le=20, description="Maximum number of rerun attempts for failed records")
@@ -442,15 +442,9 @@ def _warn_deprecated_aliases(cls, data: Any) -> Any:
     def _check_companion_services(self) -> "RunConfig":
         """Ensure required companion services are set for each pipeline mode."""
         if isinstance(self.model, PipelineConfig):
-            if not self.model.stt:
-                raise ValueError("EVA_MODEL__STT is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).")
-            if not self.model.tts:
-                raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__LLM (ASR-LLM-TTS pipeline).")
             self._validate_service_params("STT", self.model.stt, self.model.stt_params)
             self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
         elif isinstance(self.model, AudioLLMConfig):
-            if not self.model.tts:
-                raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__AUDIO_LLM (SpeechLM-TTS pipeline).")
             self._validate_service_params("TTS", self.model.tts, self.model.tts_params)
         return self
 

diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py
@@ -204,60 +204,61 @@ def test_invalid_model_list(self, environ, expected_exception, expected_message)
             _config(env_vars=environ)
 
     @pytest.mark.parametrize(
-        "environ, expected_exception, expected_message",
+        "environ, expected_message",
         (
             (
                 {},
-                ValidationError,
                 r"model\s+Field required",
             ),
             (
                 {"EVA_MODEL": "{}"},
-                ValidationError,
                 # Discriminator defaults to PipelineConfig when no unique field present
                 r"model\.pipeline\.llm\s+Field required",
             ),
+            (
+                {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"},
+                "Multiple pipeline modes set",
+            ),
+            (
+                {"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"},
+                "Multiple pipeline modes set",
+            ),
+            (
+                {"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"},
+                "Multiple pipeline modes set",
+            ),
+            (
+                {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"},
+                "Multiple pipeline modes set",
+            ),
+            (
+                {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"},
+                r"model\.pipeline\.stt\s+Field required",
+            ),
+            (
+                {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"},
+                r"model\.pipeline\.tts\s+Field required",
+            ),
+            (
+                {"EVA_MODEL__AUDIO_LLM": "ultravox"},
+                r"model\.audio_llm\.tts\s+Field required",
+            ),
+        ),
+        ids=(
+            "Missing",
+            "Empty",
+            "Mixed LLM + S2S",
+            "Mixed LLM + Audio LLM",
+            "Mixed S2S + Audio LLM",
+            "Mixed all three",
+            "LLM without STT",
+            "LLM without TTS",
+            "Audio LLM without TTS",
         ),
     )
-    def test_model_missing_or_empty(self, environ, expected_exception, expected_message):
-        environ |= _EVA_MODEL_LIST_ENV
-        with pytest.raises(expected_exception, match=expected_message):
-            _config(env_vars=environ)
-
-    def test_mixed_mode_fields_raises_error(self):
-        """Multiple pipeline mode indicators cause a clear error."""
-        # llm + s2s
-        with pytest.raises(ValueError, match="Multiple pipeline modes set"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b"})
-
-        # llm + audio_llm
-        with pytest.raises(ValueError, match="Multiple pipeline modes set"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"})
-
-        # s2s + audio_llm
-        with pytest.raises(ValueError, match="Multiple pipeline modes set"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__S2S": "a", "EVA_MODEL__AUDIO_LLM": "ultravox"})
-
-        # all three
-        with pytest.raises(ValueError, match="Multiple pipeline modes set"):
-            _config(
-                env_vars=_EVA_MODEL_LIST_ENV
-                | {"EVA_MODEL__LLM": "a", "EVA_MODEL__S2S": "b", "EVA_MODEL__AUDIO_LLM": "ultravox"}
-            )
-
-    def test_missing_companion_services(self):
-        """Required companion services cause a clear error when missing."""
-        # LLM without STT
-        with pytest.raises(ValueError, match="EVA_MODEL__STT is required"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__TTS": "cartesia"})
-
-        # LLM without TTS
-        with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__LLM": "gpt-5.2", "EVA_MODEL__STT": "deepgram"})
-
-        # Audio-LLM without TTS
-        with pytest.raises(ValueError, match="EVA_MODEL__TTS is required"):
-            _config(env_vars=_EVA_MODEL_LIST_ENV | {"EVA_MODEL__AUDIO_LLM": "ultravox"})
+    def test_invalid_model_pipeline(self, environ, expected_message):
+        with pytest.raises(ValidationError, match=expected_message):
+            _config(env_vars=_EVA_MODEL_LIST_ENV | environ)
 
     def test_missing_stt_tts_params(self):
         """Missing api_key or model in STT/TTS params causes a clear error."""
@@ -583,11 +584,6 @@ def test_realtime_model(self):
         config = _config(env_vars=_EVA_MODEL_LIST_ENV, cli_args=["--realtime-model", "test-model"])
         assert config.model.s2s == "test-model"
 
-    def test_domain_cli(self):
-        """--domain sets derived paths."""
-        c = _config(env_vars=_BASE_ENV, cli_args=["--domain", "my_domain"])
-        assert c.agent_config_path == Path("configs/agents/my_domain_agent.yaml")
-
     def test_run_id(self):
         c = _config(env_vars=_BASE_ENV, cli_args=["--run-id", "my-run"])
         assert c.run_id == "my-run"