From a13480d0d50d6473797f85855b5cabc70676dd6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Thu, 26 Mar 2026 18:02:44 -0400 Subject: [PATCH 01/11] Add model names to timestamp --- src/eva/models/config.py | 27 +++++++++++++++++++++++++ tests/unit/models/test_config_models.py | 4 ++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index cd8fe819..a674465d 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -39,6 +39,26 @@ def current_date_and_time(): return f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}" +def _model_suffix(model: Any) -> str: + """Build a short suffix from the model config for use in folder names.""" + if isinstance(model, PipelineConfig): + parts = [ + model.stt_params.get("alias") or model.stt_params.get("model") or model.stt or "", + model.llm, + model.tts_params.get("alias") or model.tts_params.get("model") or model.tts or "", + ] + elif isinstance(model, SpeechToSpeechConfig): + parts = [model.s2s_params.get("alias") or model.s2s_params.get("model") or model.s2s] + elif isinstance(model, AudioLLMConfig): + parts = [ + model.audio_llm_params.get("alias") or model.audio_llm_params.get("model") or model.audio_llm, + model.tts_params.get("alias") or model.tts_params.get("model") or model.tts or "", + ] + else: + return "" + return "_".join(p for p in parts if p) + + class PipelineConfig(BaseModel): """Configuration for a STT + LLM + TTS pipeline.""" @@ -452,6 +472,13 @@ def _check_companion_services(self) -> "RunConfig": if not self.model.tts: raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__AUDIO_LLM (SpeechLM-TTS pipeline).") self._validate_service_params("TTS", self.model.tts, self.model.tts_params) + + # Append model names to auto-generated run_id + if "run_id" not in self.model_fields_set: + suffix = _model_suffix(self.model) + if suffix: + self.run_id = f"{self.run_id}_{suffix}" + return self # Providers that manage their own model/key resolution (e.g. WebSocket-based) diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 9b77854c..47ca4873 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -2,7 +2,6 @@ import json import os -from datetime import datetime from pathlib import Path from unittest.mock import MagicMock, patch @@ -81,7 +80,8 @@ def test_create_minimal_config(self): assert config.dataset_path == Path("data/airline_dataset.jsonl") assert config.tool_mocks_path == Path("data/airline_scenarios") - assert datetime.strptime(config.run_id, "%Y-%m-%d_%H-%M-%S.%f") + # run_id = timestamp + model suffix (e.g. "2024-01-15_14-30-45.123456_nova-2_gpt-5.2_sonic") + assert config.run_id.endswith("nova-2_gpt-5.2_sonic") assert config.max_concurrent_conversations == 1 assert config.conversation_timeout_seconds == 360 From 44e521f026b1dab47a92b4e42ffc0de65d048c26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Thu, 26 Mar 2026 18:04:53 -0400 Subject: [PATCH 02/11] Make sure we are not saving api keys in config.json --- src/eva/models/config.py | 12 ++++++++++++ tests/unit/models/test_config_models.py | 8 +++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index a674465d..d9e46867 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -532,6 +532,18 @@ def _redact_model_list(cls, deployments: list[ModelDeployment]) -> list[dict]: redacted.append(deployment) return redacted + @field_serializer("model") + @classmethod + def _redact_model_params(cls, model: ModelConfigUnion) -> dict: + """Redact secret values in STT/TTS/S2S/AudioLLM params when serializing.""" + data = model.model_dump(mode="json") + for field_name, value in data.items(): + if field_name.endswith("_params") and isinstance(value, dict): + for key in value: + if "key" in key or "credentials" in key: + value[key] = "***" + return data + @classmethod def from_yaml(cls, path: Path | str) -> "RunConfig": """Load configuration from YAML file.""" diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 47ca4873..3f445544 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -160,13 +160,19 @@ def test_indentation_in_model_list(self, tmp_path: Path, vars_location: str, ind assert config.model_list == MODEL_LIST def test_secrets_redacted(self): - """Secrets are redacted in model_list.""" + """Secrets are redacted in model_list and STT/TTS params.""" config = _config(env_vars=_BASE_ENV) dumped = config.model_dump(mode="json") assert dumped["model_list"][0]["litellm_params"]["api_key"] == "***" assert dumped["model_list"][1]["litellm_params"]["vertex_credentials"] == "***" assert dumped["model_list"][2]["litellm_params"]["aws_access_key_id"] == "***" assert dumped["model_list"][2]["litellm_params"]["aws_secret_access_key"] == "***" + # STT/TTS params api_key must also be redacted + assert dumped["model"]["stt_params"]["api_key"] == "***" + assert dumped["model"]["tts_params"]["api_key"] == "***" + # Non-secret fields preserved + assert dumped["model"]["stt_params"]["model"] == "nova-2" + assert dumped["model"]["tts_params"]["model"] == "sonic" @pytest.mark.parametrize( "environ, expected_exception, expected_message", From aee752d5ffafe641dc4425a9c484bb9272d6b6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Thu, 26 Mar 2026 18:25:06 -0400 Subject: [PATCH 03/11] On rerun read the api keys from .env and not config.json --- src/eva/models/config.py | 56 +++++++++++++++++++ src/eva/run_benchmark.py | 3 ++ tests/unit/models/test_config_models.py | 71 +++++++++++++++++++++++++ 3 files changed, 130 insertions(+) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index d9e46867..6e6a4ce7 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -12,6 +12,7 @@ ``RunConfig(_env_file=".env", _cli_parse_args=True)``. """ +import logging from datetime import UTC, datetime from pathlib import Path from typing import Annotated, Any, ClassVar, Literal @@ -34,6 +35,8 @@ from eva.models.provenance import RunProvenance +logger = logging.getLogger(__name__) + def current_date_and_time(): return f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}" @@ -544,6 +547,59 @@ def _redact_model_params(cls, model: ModelConfigUnion) -> dict: value[key] = "***" return data + def restore_redacted_secrets(self, live: "RunConfig") -> None: + """Replace redacted ``***`` values in ``*_params`` dicts with real values from *live* config. + + Raises: + ValueError: If the saved and live configs use different providers or aliases + for any service that has redacted secrets. + """ + # Map each params field to its provider field (e.g. stt_params -> stt) + _PARAMS_TO_PROVIDER = { + "stt_params": "stt", + "tts_params": "tts", + "s2s_params": "s2s", + "audio_llm_params": "audio_llm", + } + for params_field, provider_field in _PARAMS_TO_PROVIDER.items(): + saved = getattr(self.model, params_field, None) + source = getattr(live.model, params_field, None) + if not isinstance(saved, dict) or not isinstance(source, dict): + continue + has_redacted = any(v == "***" for v in saved.values()) + if not has_redacted: + continue + + # Check provider matches (e.g. stt: "deepgram" vs "cartesia") + saved_provider = getattr(self.model, provider_field, None) + live_provider = getattr(live.model, provider_field, None) + if saved_provider != live_provider: + raise ValueError( + f"Cannot restore secrets: saved {provider_field}={saved_provider!r} " + f"but current environment has {provider_field}={live_provider!r}" + ) + + # Check alias matches (strict — aliases identify a specific configuration) + saved_alias = saved.get("alias") + live_alias = source.get("alias") + if saved_alias and live_alias and saved_alias != live_alias: + raise ValueError( + f"Cannot restore secrets: saved {params_field}[alias]={saved_alias!r} " + f"but current environment has {params_field}[alias]={live_alias!r}" + ) + + # Warn if model changed (non-fatal — models can be updated) + saved_model = saved.get("model") + live_model = source.get("model") + if saved_model and live_model and saved_model != live_model: + logger.warning( + f"Model mismatch for {params_field}: saved {saved_model!r}, current environment has {live_model!r}" + ) + + for key, value in saved.items(): + if value == "***" and key in source: + saved[key] = source[key] + @classmethod def from_yaml(cls, path: Path | str) -> "RunConfig": """Load configuration from YAML file.""" diff --git a/src/eva/run_benchmark.py b/src/eva/run_benchmark.py index 92d32b01..78a66843 100644 --- a/src/eva/run_benchmark.py +++ b/src/eva/run_benchmark.py @@ -42,6 +42,9 @@ async def run_benchmark(config: RunConfig) -> int: logger.error(str(e)) return 1 + # Restore secrets redacted in config.json with live env values + runner.config.restore_redacted_secrets(config) + # Apply CLI overrides runner.config.max_rerun_attempts = config.max_rerun_attempts runner.config.force_rerun_metrics = config.force_rerun_metrics diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 3f445544..69e5fbd2 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -174,6 +174,77 @@ def test_secrets_redacted(self): assert dumped["model"]["stt_params"]["model"] == "nova-2" assert dumped["model"]["tts_params"]["model"] == "sonic" + def test_restore_redacted_secrets(self): + """Redacted secrets are restored from a live config.""" + config = _config(env_vars=_BASE_ENV) + # Simulate round-trip through config.json (redacted on dump, loaded back) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + assert loaded.model.stt_params["api_key"] == "***" + assert loaded.model.tts_params["api_key"] == "***" + + # Restore from live config (which has real keys from env) + loaded.restore_redacted_secrets(config) + assert loaded.model.stt_params["api_key"] == "test_key" + assert loaded.model.tts_params["api_key"] == "test_key" + # Non-secret fields unchanged + assert loaded.model.stt_params["model"] == "nova-2" + + def test_restore_redacted_secrets_provider_mismatch(self): + """Restoring secrets fails if the STT/TTS provider changed.""" + config = _config(env_vars=_BASE_ENV) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + # Live config uses a different STT provider + live = _config( + env_vars=_BASE_ENV + | { + "EVA_MODEL__STT": "openai_whisper", + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "whisper-1"}), + } + ) + with pytest.raises(ValueError, match=r"saved stt='deepgram'.*current environment has stt='openai_whisper'"): + loaded.restore_redacted_secrets(live) + + def test_restore_redacted_secrets_model_mismatch_warns(self, caplog): + """Restoring secrets warns (but succeeds) if the STT/TTS model changed.""" + config = _config(env_vars=_BASE_ENV) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + # Same provider, different model + live = _config(env_vars=_BASE_ENV | {"EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic-2"})}) + with caplog.at_level("WARNING", logger="eva.models.config"): + loaded.restore_redacted_secrets(live) + assert "sonic" in caplog.text + assert "sonic-2" in caplog.text + # Secrets still restored despite the warning + assert loaded.model.tts_params["api_key"] == "k" + + def test_restore_redacted_secrets_alias_mismatch(self): + """Restoring secrets fails if the alias changed.""" + config = _config( + env_vars=_BASE_ENV + | { + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "alias": "stt-v1"}), + } + ) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + live = _config( + env_vars=_BASE_ENV + | { + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "alias": "stt-v2"}), + } + ) + with pytest.raises( + ValueError, + match=r"saved stt_params\[alias\]='stt-v1'.*current environment has stt_params\[alias\]='stt-v2'", + ): + loaded.restore_redacted_secrets(live) + @pytest.mark.parametrize( "environ, expected_exception, expected_message", ( From e8855ab108303460454c193d354ceb893024791a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Thu, 26 Mar 2026 18:55:23 -0400 Subject: [PATCH 04/11] Make sure to not mutate the api keys in memory --- src/eva/models/config.py | 2 ++ tests/unit/models/test_config_models.py | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index 6e6a4ce7..e0dac293 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -12,6 +12,7 @@ ``RunConfig(_env_file=".env", _cli_parse_args=True)``. """ +import copy import logging from datetime import UTC, datetime from pathlib import Path @@ -527,6 +528,7 @@ def _redact_model_list(cls, deployments: list[ModelDeployment]) -> list[dict]: """Redact secret values in litellm_params when serializing.""" redacted = [] for deployment in deployments: + deployment = copy.deepcopy(deployment) if "litellm_params" in deployment: params = deployment["litellm_params"] for key in params: diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 69e5fbd2..079ce7e6 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -174,6 +174,17 @@ def test_secrets_redacted(self): assert dumped["model"]["stt_params"]["model"] == "nova-2" assert dumped["model"]["tts_params"]["model"] == "sonic" + def test_secrets_redaction_does_not_mutate_live_config(self): + """Serializing must not corrupt the in-memory config objects.""" + config = _config(env_vars=_BASE_ENV) + config.model_dump(mode="json") + # model_list keys must still hold real values + assert config.model_list[0]["litellm_params"]["api_key"] == "must_be_redacted" + assert config.model_list[1]["litellm_params"]["vertex_credentials"] == "must_be_redacted" + # STT/TTS params must still hold real values + assert config.model.stt_params["api_key"] == "test_key" + assert config.model.tts_params["api_key"] == "test_key" + def test_restore_redacted_secrets(self): """Redacted secrets are restored from a live config.""" config = _config(env_vars=_BASE_ENV) From 0f3a04b2c55e301c2217e90938d05a77fc007fd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Thu, 26 Mar 2026 19:13:58 -0400 Subject: [PATCH 05/11] Use same strategy for litellm --- src/eva/models/config.py | 41 ++++++++++---- tests/unit/models/test_config_models.py | 72 ++++++++++++++++++------- 2 files changed, 83 insertions(+), 30 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index e0dac293..c7a67f8e 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -550,13 +550,15 @@ def _redact_model_params(cls, model: ModelConfigUnion) -> dict: return data def restore_redacted_secrets(self, live: "RunConfig") -> None: - """Replace redacted ``***`` values in ``*_params`` dicts with real values from *live* config. + """Replace ``***`` values in this config with real values from *live*. + + Covers both ``model.*_params`` (STT/TTS/S2S/AudioLLM secrets) and + ``model_list[].litellm_params`` (LLM deployment secrets). Raises: - ValueError: If the saved and live configs use different providers or aliases - for any service that has redacted secrets. + ValueError: If provider or alias differs for a service with redacted secrets. """ - # Map each params field to its provider field (e.g. stt_params -> stt) + # ── model.*_params (STT / TTS / S2S / AudioLLM) ── _PARAMS_TO_PROVIDER = { "stt_params": "stt", "tts_params": "tts", @@ -568,11 +570,9 @@ def restore_redacted_secrets(self, live: "RunConfig") -> None: source = getattr(live.model, params_field, None) if not isinstance(saved, dict) or not isinstance(source, dict): continue - has_redacted = any(v == "***" for v in saved.values()) - if not has_redacted: + if not any(v == "***" for v in saved.values()): continue - # Check provider matches (e.g. stt: "deepgram" vs "cartesia") saved_provider = getattr(self.model, provider_field, None) live_provider = getattr(live.model, provider_field, None) if saved_provider != live_provider: @@ -581,7 +581,6 @@ def restore_redacted_secrets(self, live: "RunConfig") -> None: f"but current environment has {provider_field}={live_provider!r}" ) - # Check alias matches (strict — aliases identify a specific configuration) saved_alias = saved.get("alias") live_alias = source.get("alias") if saved_alias and live_alias and saved_alias != live_alias: @@ -590,18 +589,40 @@ def restore_redacted_secrets(self, live: "RunConfig") -> None: f"but current environment has {params_field}[alias]={live_alias!r}" ) - # Warn if model changed (non-fatal — models can be updated) saved_model = saved.get("model") live_model = source.get("model") if saved_model and live_model and saved_model != live_model: logger.warning( - f"Model mismatch for {params_field}: saved {saved_model!r}, current environment has {live_model!r}" + "Model mismatch for %s: saved %r, current environment has %r", + params_field, + saved_model, + live_model, ) for key, value in saved.items(): if value == "***" and key in source: saved[key] = source[key] + # ── model_list[].litellm_params (LLM deployments) ── + live_by_name = {d["model_name"]: d for d in live.model_list if "model_name" in d} + for deployment in self.model_list: + name = deployment.get("model_name") + if not name: + continue + saved_params = deployment.get("litellm_params", {}) + has_redacted = any(v == "***" for v in saved_params.values()) + if not has_redacted: + continue + if name not in live_by_name: + raise ValueError( + f"Cannot restore secrets: deployment {name!r} not found in " + f"current EVA_MODEL_LIST (available: {list(live_by_name)})" + ) + live_params = live_by_name[name].get("litellm_params", {}) + for key, value in saved_params.items(): + if value == "***" and key in live_params: + saved_params[key] = live_params[key] + @classmethod def from_yaml(cls, path: Path | str) -> "RunConfig": """Load configuration from YAML file.""" diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 079ce7e6..7b00573e 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -186,20 +186,29 @@ def test_secrets_redaction_does_not_mutate_live_config(self): assert config.model.tts_params["api_key"] == "test_key" def test_restore_redacted_secrets(self): - """Redacted secrets are restored from a live config.""" + """Redacted secrets are restored from a live config for both model and model_list.""" config = _config(env_vars=_BASE_ENV) - # Simulate round-trip through config.json (redacted on dump, loaded back) dumped_json = config.model_dump_json() loaded = RunConfig.model_validate_json(dumped_json) + + # Everything is redacted after round-trip assert loaded.model.stt_params["api_key"] == "***" assert loaded.model.tts_params["api_key"] == "***" + assert loaded.model_list[0]["litellm_params"]["api_key"] == "***" + assert loaded.model_list[1]["litellm_params"]["vertex_credentials"] == "***" + assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "***" - # Restore from live config (which has real keys from env) loaded.restore_redacted_secrets(config) + + # STT/TTS params restored assert loaded.model.stt_params["api_key"] == "test_key" assert loaded.model.tts_params["api_key"] == "test_key" - # Non-secret fields unchanged assert loaded.model.stt_params["model"] == "nova-2" + # model_list restored + assert loaded.model_list[0]["litellm_params"]["api_key"] == "must_be_redacted" + assert loaded.model_list[1]["litellm_params"]["vertex_credentials"] == "must_be_redacted" + assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "must_be_redacted" + assert loaded.model_list[2]["litellm_params"]["aws_secret_access_key"] == "must_be_redacted" def test_restore_redacted_secrets_provider_mismatch(self): """Restoring secrets fails if the STT/TTS provider changed.""" @@ -207,7 +216,6 @@ def test_restore_redacted_secrets_provider_mismatch(self): dumped_json = config.model_dump_json() loaded = RunConfig.model_validate_json(dumped_json) - # Live config uses a different STT provider live = _config( env_vars=_BASE_ENV | { @@ -218,21 +226,6 @@ def test_restore_redacted_secrets_provider_mismatch(self): with pytest.raises(ValueError, match=r"saved stt='deepgram'.*current environment has stt='openai_whisper'"): loaded.restore_redacted_secrets(live) - def test_restore_redacted_secrets_model_mismatch_warns(self, caplog): - """Restoring secrets warns (but succeeds) if the STT/TTS model changed.""" - config = _config(env_vars=_BASE_ENV) - dumped_json = config.model_dump_json() - loaded = RunConfig.model_validate_json(dumped_json) - - # Same provider, different model - live = _config(env_vars=_BASE_ENV | {"EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic-2"})}) - with caplog.at_level("WARNING", logger="eva.models.config"): - loaded.restore_redacted_secrets(live) - assert "sonic" in caplog.text - assert "sonic-2" in caplog.text - # Secrets still restored despite the warning - assert loaded.model.tts_params["api_key"] == "k" - def test_restore_redacted_secrets_alias_mismatch(self): """Restoring secrets fails if the alias changed.""" config = _config( @@ -256,6 +249,45 @@ def test_restore_redacted_secrets_alias_mismatch(self): ): loaded.restore_redacted_secrets(live) + def test_restore_redacted_secrets_model_mismatch_warns(self, caplog): + """Restoring secrets warns (but succeeds) if the STT/TTS model changed.""" + config = _config(env_vars=_BASE_ENV) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + live = _config(env_vars=_BASE_ENV | {"EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic-2"})}) + with caplog.at_level("WARNING", logger="eva.models.config"): + loaded.restore_redacted_secrets(live) + assert "sonic" in caplog.text + assert "sonic-2" in caplog.text + assert loaded.model.tts_params["api_key"] == "k" + + def test_restore_redacted_secrets_llm_deployment_mismatch(self): + """Restoring secrets fails if a saved LLM deployment is missing from the live model_list.""" + config = _config(env_vars=_BASE_ENV) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + # Live config has a different model_list (only one deployment, different name) + different_model_list = [ + { + "model_name": "gpt-4o", + "litellm_params": {"model": "openai/gpt-4o", "api_key": "real_key"}, + } + ] + live = _config( + env_vars={ + "EVA_MODEL_LIST": json.dumps(different_model_list), + "EVA_MODEL__LLM": "gpt-4o", + "EVA_MODEL__STT": "deepgram", + "EVA_MODEL__TTS": "cartesia", + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2"}), + "EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic"}), + } + ) + with pytest.raises(ValueError, match=r"deployment 'gpt-5.2' not found in current EVA_MODEL_LIST"): + loaded.restore_redacted_secrets(live) + @pytest.mark.parametrize( "environ, expected_exception, expected_message", ( From 57299567a7074a3d4c57045c3d7df5831e7107d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Mon, 30 Mar 2026 16:58:21 -0400 Subject: [PATCH 06/11] Refactor pipeline name --- src/eva/models/config.py | 49 +++++++++++++++++++--------------- src/eva/orchestrator/runner.py | 4 ++- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index c7a67f8e..7cac90c4 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -43,24 +43,9 @@ def current_date_and_time(): return f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}" -def _model_suffix(model: Any) -> str: - """Build a short suffix from the model config for use in folder names.""" - if isinstance(model, PipelineConfig): - parts = [ - model.stt_params.get("alias") or model.stt_params.get("model") or model.stt or "", - model.llm, - model.tts_params.get("alias") or model.tts_params.get("model") or model.tts or "", - ] - elif isinstance(model, SpeechToSpeechConfig): - parts = [model.s2s_params.get("alias") or model.s2s_params.get("model") or model.s2s] - elif isinstance(model, AudioLLMConfig): - parts = [ - model.audio_llm_params.get("alias") or model.audio_llm_params.get("model") or model.audio_llm, - model.tts_params.get("alias") or model.tts_params.get("model") or model.tts or "", - ] - else: - return "" - return "_".join(p for p in parts if p) +def _param_alias(params: dict[str, Any]) -> str: + """Return the display alias from a params dict.""" + return params.get("alias") or params.get("model") or "" class PipelineConfig(BaseModel): @@ -97,6 +82,16 @@ class PipelineConfig(BaseModel): ), ) + @property + def pipeline_name(self) -> str: + """Short name for use in folder names.""" + parts = [ + _param_alias(self.stt_params) or self.stt or "", + self.llm, + _param_alias(self.tts_params) or self.tts or "", + ] + return "_".join(p for p in parts if p) + @model_validator(mode="before") @classmethod def _migrate_legacy_fields(cls, data: Any) -> Any: @@ -121,6 +116,11 @@ class SpeechToSpeechConfig(BaseModel): s2s: str = Field(description="Speech-to-speech model name", examples=["gpt-realtime-mini", "gemini_live"]) s2s_params: dict[str, Any] = Field({}, description="Additional speech-to-speech model parameters (JSON)") + @property + def pipeline_name(self) -> str: + """Short name for use in folder names.""" + return _param_alias(self.s2s_params) or self.s2s + class AudioLLMConfig(BaseModel): """Configuration for an Audio-LLM pipeline (audio in, text out, separate TTS). @@ -142,6 +142,15 @@ class AudioLLMConfig(BaseModel): tts: str | None = Field(None, description="TTS model", examples=["cartesia", "elevenlabs"]) tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)") + @property + def pipeline_name(self) -> str: + """Short name for use in folder names.""" + parts = [ + _param_alias(self.audio_llm_params) or self.audio_llm, + _param_alias(self.tts_params) or self.tts or "", + ] + return "_".join(p for p in parts if p) + _PIPELINE_FIELDS = { "llm", @@ -479,9 +488,7 @@ def _check_companion_services(self) -> "RunConfig": # Append model names to auto-generated run_id if "run_id" not in self.model_fields_set: - suffix = _model_suffix(self.model) - if suffix: - self.run_id = f"{self.run_id}_{suffix}" + self.run_id = f"{self.run_id}_{self.model.pipeline_name}" return self diff --git a/src/eva/orchestrator/runner.py b/src/eva/orchestrator/runner.py index f92d98af..6507dace 100644 --- a/src/eva/orchestrator/runner.py +++ b/src/eva/orchestrator/runner.py @@ -138,7 +138,9 @@ async def run(self, records: list[EvaluationRecord]) -> RunResult: } config_path = self.output_dir / "config.json" - config_path.write_text(self.config.model_dump_json(indent=2)) + config_data = self.config.model_dump(mode="json") + config_data["pipeline_name"] = self.config.model.pipeline_name + config_path.write_text(json.dumps(config_data, indent=2)) # Build output_id list for tracking (supports pass@k) num_trials = self.config.num_trials From a10e2cbe5ee311f1398eddd70736a948dcc81090 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Mon, 30 Mar 2026 17:04:08 -0400 Subject: [PATCH 07/11] Saving parts rather than name --- src/eva/models/config.py | 37 +++++++++++++++++----------------- src/eva/orchestrator/runner.py | 3 ++- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index 7cac90c4..2ed25490 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -83,14 +83,13 @@ class PipelineConfig(BaseModel): ) @property - def pipeline_name(self) -> str: - """Short name for use in folder names.""" - parts = [ - _param_alias(self.stt_params) or self.stt or "", - self.llm, - _param_alias(self.tts_params) or self.tts or "", - ] - return "_".join(p for p in parts if p) + def pipeline_parts(self) -> dict[str, str]: + """Component names for this pipeline.""" + return { + "stt": _param_alias(self.stt_params) or self.stt or "", + "llm": self.llm, + "tts": _param_alias(self.tts_params) or self.tts or "", + } @model_validator(mode="before") @classmethod @@ -117,9 +116,9 @@ class SpeechToSpeechConfig(BaseModel): s2s_params: dict[str, Any] = Field({}, description="Additional speech-to-speech model parameters (JSON)") @property - def pipeline_name(self) -> str: - """Short name for use in folder names.""" - return _param_alias(self.s2s_params) or self.s2s + def pipeline_parts(self) -> dict[str, str]: + """Component names for this pipeline.""" + return {"s2s": _param_alias(self.s2s_params) or self.s2s} class AudioLLMConfig(BaseModel): @@ -143,13 +142,12 @@ class AudioLLMConfig(BaseModel): tts_params: dict[str, Any] = Field({}, description="Additional TTS model parameters (JSON)") @property - def pipeline_name(self) -> str: - """Short name for use in folder names.""" - parts = [ - _param_alias(self.audio_llm_params) or self.audio_llm, - _param_alias(self.tts_params) or self.tts or "", - ] - return "_".join(p for p in parts if p) + def pipeline_parts(self) -> dict[str, str]: + """Component names for this pipeline.""" + return { + "audio_llm": _param_alias(self.audio_llm_params) or self.audio_llm, + "tts": _param_alias(self.tts_params) or self.tts or "", + } _PIPELINE_FIELDS = { @@ -488,7 +486,8 @@ def _check_companion_services(self) -> "RunConfig": # Append model names to auto-generated run_id if "run_id" not in self.model_fields_set: - self.run_id = f"{self.run_id}_{self.model.pipeline_name}" + suffix = "_".join(v for v in self.model.pipeline_parts.values() if v) + self.run_id = f"{self.run_id}_{suffix}" return self diff --git a/src/eva/orchestrator/runner.py b/src/eva/orchestrator/runner.py index 6507dace..ac5a45f3 100644 --- a/src/eva/orchestrator/runner.py +++ b/src/eva/orchestrator/runner.py @@ -139,7 +139,8 @@ async def run(self, records: list[EvaluationRecord]) -> RunResult: config_path = self.output_dir / "config.json" config_data = self.config.model_dump(mode="json") - config_data["pipeline_name"] = self.config.model.pipeline_name + pipeline_parts = self.config.model.pipeline_parts + config_data["pipeline_parts"] = pipeline_parts config_path.write_text(json.dumps(config_data, indent=2)) # Build output_id list for tracking (supports pass@k) From 98c6bbf2e63a95d04a86d7856f13dce5cdcc1a1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Tue, 31 Mar 2026 13:50:28 -0400 Subject: [PATCH 08/11] Read url from .env file and rename --- src/eva/models/config.py | 120 +++++++++++++++--------- src/eva/run_benchmark.py | 4 +- tests/unit/models/test_config_models.py | 56 +++++++++-- 3 files changed, 125 insertions(+), 55 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index 2ed25490..34ac1ae6 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -300,6 +300,21 @@ class RunConfig(BaseSettings): "EVA_METRICS_TO_RUN": "EVA_METRICS", } + # Providers that manage their own model/key resolution (e.g. WebSocket-based) + _SKIP_PARAMS_VALIDATION: ClassVar[set[str]] = {"nvidia"} + + # Maps *_params field names to their provider field for env override logic + _PARAMS_TO_PROVIDER: ClassVar[dict[str, str]] = { + "stt_params": "stt", + "tts_params": "tts", + "s2s_params": "s2s", + "audio_llm_params": "audio_llm", + } + # Keys always read from the live environment (not persisted across runs) + _ENV_OVERRIDE_KEYS: ClassVar[set[str]] = {"url", "urls"} + # Substrings that identify secret keys (redacted in logs and config.json) + _SECRET_KEY_PATTERNS: ClassVar[set[str]] = {"key", "credentials", "secret"} + class ModelDeployment(DeploymentTypedDict): """DeploymentTypedDict that preserves extra keys in litellm_params.""" @@ -491,9 +506,6 @@ def _check_companion_services(self) -> "RunConfig": return self - # Providers that manage their own model/key resolution (e.g. WebSocket-based) - _SKIP_PARAMS_VALIDATION: ClassVar[set[str]] = {"nvidia"} - @classmethod def _validate_service_params(cls, service: str, provider: str, params: dict[str, Any]) -> None: """Validate that STT/TTS params contain required keys.""" @@ -555,58 +567,65 @@ def _redact_model_params(cls, model: ModelConfigUnion) -> dict: value[key] = "***" return data - def restore_redacted_secrets(self, live: "RunConfig") -> None: - """Replace ``***`` values in this config with real values from *live*. + def apply_env_overrides(self, live: "RunConfig") -> None: + """Apply environment-dependent values from *live* config onto this (saved) config. - Covers both ``model.*_params`` (STT/TTS/S2S/AudioLLM secrets) and - ``model_list[].litellm_params`` (LLM deployment secrets). + Restores redacted secrets (``***``) and overrides dynamic fields (``url``, + ``urls``) in ``model.*_params`` and ``model_list[].litellm_params``. Raises: ValueError: If provider or alias differs for a service with redacted secrets. """ # ── model.*_params (STT / TTS / S2S / AudioLLM) ── - _PARAMS_TO_PROVIDER = { - "stt_params": "stt", - "tts_params": "tts", - "s2s_params": "s2s", - "audio_llm_params": "audio_llm", - } - for params_field, provider_field in _PARAMS_TO_PROVIDER.items(): + for params_field, provider_field in self._PARAMS_TO_PROVIDER.items(): saved = getattr(self.model, params_field, None) source = getattr(live.model, params_field, None) if not isinstance(saved, dict) or not isinstance(source, dict): continue - if not any(v == "***" for v in saved.values()): - continue - - saved_provider = getattr(self.model, provider_field, None) - live_provider = getattr(live.model, provider_field, None) - if saved_provider != live_provider: - raise ValueError( - f"Cannot restore secrets: saved {provider_field}={saved_provider!r} " - f"but current environment has {provider_field}={live_provider!r}" - ) - - saved_alias = saved.get("alias") - live_alias = source.get("alias") - if saved_alias and live_alias and saved_alias != live_alias: - raise ValueError( - f"Cannot restore secrets: saved {params_field}[alias]={saved_alias!r} " - f"but current environment has {params_field}[alias]={live_alias!r}" - ) - saved_model = saved.get("model") - live_model = source.get("model") - if saved_model and live_model and saved_model != live_model: - logger.warning( - "Model mismatch for %s: saved %r, current environment has %r", - params_field, - saved_model, - live_model, - ) + has_redacted = any(v == "***" for v in saved.values()) + has_env_overrides = any(k in saved or k in source for k in self._ENV_OVERRIDE_KEYS) + if not has_redacted and not has_env_overrides: + continue - for key, value in saved.items(): - if value == "***" and key in source: + if has_redacted: + saved_provider = getattr(self.model, provider_field, None) + live_provider = getattr(live.model, provider_field, None) + if saved_provider != live_provider: + raise ValueError( + f"Cannot restore secrets: saved {provider_field}={saved_provider!r} " + f"but current environment has {provider_field}={live_provider!r}" + ) + + saved_alias = saved.get("alias") + live_alias = source.get("alias") + if saved_alias and live_alias and saved_alias != live_alias: + raise ValueError( + f"Cannot restore secrets: saved {params_field}[alias]={saved_alias!r} " + f"but current environment has {params_field}[alias]={live_alias!r}" + ) + + saved_model = saved.get("model") + live_model = source.get("model") + if saved_model and live_model and saved_model != live_model: + logger.warning( + f"Model mismatch for {params_field}: saved {saved_model!r}, " + f"current environment has {live_model!r}" + ) + + for key, value in saved.items(): + if value == "***" and key in source: + saved[key] = source[key] + + # Always use url/urls from the live environment + for key in self._ENV_OVERRIDE_KEYS: + if key in source: + saved_val = saved.get(key) + if saved_val and saved_val != source[key]: + logger.warning( + f"{params_field}[{key}] differs: saved {saved_val!r}, " + f"using {source[key]!r} from current environment" + ) saved[key] = source[key] # ── model_list[].litellm_params (LLM deployments) ── @@ -629,6 +648,21 @@ def restore_redacted_secrets(self, live: "RunConfig") -> None: if value == "***" and key in live_params: saved_params[key] = live_params[key] + # ── Log resolved configuration ── + def _safe_params(p: dict) -> dict: + return {k: "***" if any(s in k for s in self._SECRET_KEY_PATTERNS) else v for k, v in p.items()} + + for params_field, provider_field in self._PARAMS_TO_PROVIDER.items(): + params = getattr(self.model, params_field, None) + provider = getattr(self.model, provider_field, None) + if isinstance(params, dict) and params: + logger.info(f"Resolved {provider_field} ({provider}): {_safe_params(params)}") + + for deployment in self.model_list: + name = deployment.get("model_name", "?") + params = deployment.get("litellm_params", {}) + logger.info(f"Resolved deployment {name}: {_safe_params(params)}") + @classmethod def from_yaml(cls, path: Path | str) -> "RunConfig": """Load configuration from YAML file.""" diff --git a/src/eva/run_benchmark.py b/src/eva/run_benchmark.py index 78a66843..49096448 100644 --- a/src/eva/run_benchmark.py +++ b/src/eva/run_benchmark.py @@ -42,8 +42,8 @@ async def run_benchmark(config: RunConfig) -> int: logger.error(str(e)) return 1 - # Restore secrets redacted in config.json with live env values - runner.config.restore_redacted_secrets(config) + # Apply env-dependent values (secrets, urls) from live env onto saved config + runner.config.apply_env_overrides(config) # Apply CLI overrides runner.config.max_rerun_attempts = config.max_rerun_attempts diff --git a/tests/unit/models/test_config_models.py b/tests/unit/models/test_config_models.py index 7b00573e..4a2c22e4 100644 --- a/tests/unit/models/test_config_models.py +++ b/tests/unit/models/test_config_models.py @@ -185,7 +185,7 @@ def test_secrets_redaction_does_not_mutate_live_config(self): assert config.model.stt_params["api_key"] == "test_key" assert config.model.tts_params["api_key"] == "test_key" - def test_restore_redacted_secrets(self): + def test_apply_env_overrides(self): """Redacted secrets are restored from a live config for both model and model_list.""" config = _config(env_vars=_BASE_ENV) dumped_json = config.model_dump_json() @@ -198,7 +198,7 @@ def test_restore_redacted_secrets(self): assert loaded.model_list[1]["litellm_params"]["vertex_credentials"] == "***" assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "***" - loaded.restore_redacted_secrets(config) + loaded.apply_env_overrides(config) # STT/TTS params restored assert loaded.model.stt_params["api_key"] == "test_key" @@ -210,7 +210,7 @@ def test_restore_redacted_secrets(self): assert loaded.model_list[2]["litellm_params"]["aws_access_key_id"] == "must_be_redacted" assert loaded.model_list[2]["litellm_params"]["aws_secret_access_key"] == "must_be_redacted" - def test_restore_redacted_secrets_provider_mismatch(self): + def test_apply_env_overrides_provider_mismatch(self): """Restoring secrets fails if the STT/TTS provider changed.""" config = _config(env_vars=_BASE_ENV) dumped_json = config.model_dump_json() @@ -224,9 +224,9 @@ def test_restore_redacted_secrets_provider_mismatch(self): } ) with pytest.raises(ValueError, match=r"saved stt='deepgram'.*current environment has stt='openai_whisper'"): - loaded.restore_redacted_secrets(live) + loaded.apply_env_overrides(live) - def test_restore_redacted_secrets_alias_mismatch(self): + def test_apply_env_overrides_alias_mismatch(self): """Restoring secrets fails if the alias changed.""" config = _config( env_vars=_BASE_ENV @@ -247,9 +247,9 @@ def test_restore_redacted_secrets_alias_mismatch(self): ValueError, match=r"saved stt_params\[alias\]='stt-v1'.*current environment has stt_params\[alias\]='stt-v2'", ): - loaded.restore_redacted_secrets(live) + loaded.apply_env_overrides(live) - def test_restore_redacted_secrets_model_mismatch_warns(self, caplog): + def test_apply_env_overrides_model_mismatch_warns(self, caplog): """Restoring secrets warns (but succeeds) if the STT/TTS model changed.""" config = _config(env_vars=_BASE_ENV) dumped_json = config.model_dump_json() @@ -257,12 +257,48 @@ def test_restore_redacted_secrets_model_mismatch_warns(self, caplog): live = _config(env_vars=_BASE_ENV | {"EVA_MODEL__TTS_PARAMS": json.dumps({"api_key": "k", "model": "sonic-2"})}) with caplog.at_level("WARNING", logger="eva.models.config"): - loaded.restore_redacted_secrets(live) + loaded.apply_env_overrides(live) assert "sonic" in caplog.text assert "sonic-2" in caplog.text assert loaded.model.tts_params["api_key"] == "k" - def test_restore_redacted_secrets_llm_deployment_mismatch(self): + def test_apply_env_overrides_url_from_env(self, caplog): + """Url is always taken from the live env, with a warning if it differs.""" + saved_env = _BASE_ENV | { + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://old-host/stt"}), + } + config = _config(env_vars=saved_env) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + # Live env has a different url + live_env = _BASE_ENV | { + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://new-host/stt"}), + } + live = _config(env_vars=live_env) + + with caplog.at_level("WARNING", logger="eva.models.config"): + loaded.apply_env_overrides(live) + + assert loaded.model.stt_params["url"] == "wss://new-host/stt" + assert "wss://old-host/stt" in caplog.text + assert "wss://new-host/stt" in caplog.text + + def test_apply_env_overrides_url_added_from_env(self): + """Url from live env is added even if the saved config didn't have one.""" + config = _config(env_vars=_BASE_ENV) + dumped_json = config.model_dump_json() + loaded = RunConfig.model_validate_json(dumped_json) + + live_env = _BASE_ENV | { + "EVA_MODEL__STT_PARAMS": json.dumps({"api_key": "k", "model": "nova-2", "url": "wss://new-host/stt"}), + } + live = _config(env_vars=live_env) + loaded.apply_env_overrides(live) + + assert loaded.model.stt_params["url"] == "wss://new-host/stt" + + def test_apply_env_overrides_llm_deployment_mismatch(self): """Restoring secrets fails if a saved LLM deployment is missing from the live model_list.""" config = _config(env_vars=_BASE_ENV) dumped_json = config.model_dump_json() @@ -286,7 +322,7 @@ def test_restore_redacted_secrets_llm_deployment_mismatch(self): } ) with pytest.raises(ValueError, match=r"deployment 'gpt-5.2' not found in current EVA_MODEL_LIST"): - loaded.restore_redacted_secrets(live) + loaded.apply_env_overrides(live) @pytest.mark.parametrize( "environ, expected_exception, expected_message", From d366872fa694814a7b0f0f822448442753bbb982 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Tue, 31 Mar 2026 14:44:56 -0400 Subject: [PATCH 09/11] Address feebdack --- src/eva/models/config.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index 34ac1ae6..a5022341 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -540,6 +540,16 @@ def _expand_metrics_all(cls, v: list[str] | None) -> list[str] | None: return [m for m in get_global_registry().list_metrics() if m not in cls._VALIDATION_METRIC_NAMES] return v + @classmethod + def _is_secret_key(cls, key: str) -> bool: + """Return True if *key* matches any pattern in _SECRET_KEY_PATTERNS.""" + return any(pattern in key for pattern in cls._SECRET_KEY_PATTERNS) + + @classmethod + def _redact_dict(cls, params: dict) -> dict: + """Return a copy of *params* with secret values replaced by ``***``.""" + return {k: "***" if cls._is_secret_key(k) else v for k, v in params.items()} + @field_serializer("model_list") @classmethod def _redact_model_list(cls, deployments: list[ModelDeployment]) -> list[dict]: @@ -548,10 +558,7 @@ def _redact_model_list(cls, deployments: list[ModelDeployment]) -> list[dict]: for deployment in deployments: deployment = copy.deepcopy(deployment) if "litellm_params" in deployment: - params = deployment["litellm_params"] - for key in params: - if "key" in key or "credentials" in key: - params[key] = "***" + deployment["litellm_params"] = cls._redact_dict(deployment["litellm_params"]) redacted.append(deployment) return redacted @@ -562,9 +569,7 @@ def _redact_model_params(cls, model: ModelConfigUnion) -> dict: data = model.model_dump(mode="json") for field_name, value in data.items(): if field_name.endswith("_params") and isinstance(value, dict): - for key in value: - if "key" in key or "credentials" in key: - value[key] = "***" + data[field_name] = cls._redact_dict(value) return data def apply_env_overrides(self, live: "RunConfig") -> None: @@ -649,19 +654,16 @@ def apply_env_overrides(self, live: "RunConfig") -> None: saved_params[key] = live_params[key] # ── Log resolved configuration ── - def _safe_params(p: dict) -> dict: - return {k: "***" if any(s in k for s in self._SECRET_KEY_PATTERNS) else v for k, v in p.items()} - for params_field, provider_field in self._PARAMS_TO_PROVIDER.items(): params = getattr(self.model, params_field, None) provider = getattr(self.model, provider_field, None) if isinstance(params, dict) and params: - logger.info(f"Resolved {provider_field} ({provider}): {_safe_params(params)}") + logger.info(f"Resolved {provider_field} ({provider}): {self._redact_dict(params)}") for deployment in self.model_list: name = deployment.get("model_name", "?") params = deployment.get("litellm_params", {}) - logger.info(f"Resolved deployment {name}: {_safe_params(params)}") + logger.info(f"Resolved deployment {name}: {self._redact_dict(params)}") @classmethod def from_yaml(cls, path: Path | str) -> "RunConfig": From 86adbc36692bf90f1b396dc31f9bff38253d0694 Mon Sep 17 00:00:00 2001 From: "joseph.marinier" Date: Tue, 31 Mar 2026 16:08:11 -0400 Subject: [PATCH 10/11] Explain `run_id` default value in `eva --help` --- src/eva/models/config.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index a5022341..6022829b 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -39,10 +39,6 @@ logger = logging.getLogger(__name__) -def current_date_and_time(): - return f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}" - - def _param_alias(params: dict[str, Any]) -> str: """Return the display alias from a params dict.""" return params.get("alias") or params.get("model") or "" @@ -329,7 +325,7 @@ class ModelDeployment(DeploymentTypedDict): # Run identifier run_id: str = Field( - default_factory=current_date_and_time, + "timestamp and model name(s)", # Overwritten by _set_default_run_id() description="Run identifier, auto-generated if not provided", ) @@ -498,12 +494,13 @@ def _check_companion_services(self) -> "RunConfig": if not self.model.tts: raise ValueError("EVA_MODEL__TTS is required when using EVA_MODEL__AUDIO_LLM (SpeechLM-TTS pipeline).") self._validate_service_params("TTS", self.model.tts, self.model.tts_params) + return self - # Append model names to auto-generated run_id + @model_validator(mode="after") + def _set_default_run_id(self) -> "RunConfig": if "run_id" not in self.model_fields_set: suffix = "_".join(v for v in self.model.pipeline_parts.values() if v) - self.run_id = f"{self.run_id}_{suffix}" - + self.run_id = f"{datetime.now(UTC):%Y-%m-%d_%H-%M-%S.%f}_{suffix}" return self @classmethod From 96674b9253319f7ba128ebe5b178e0b67d317111 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabrielle=20Gauthier=20Melanc=CC=A7on?= Date: Wed, 1 Apr 2026 17:23:52 -0400 Subject: [PATCH 11/11] Address feebdack --- src/eva/models/config.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/eva/models/config.py b/src/eva/models/config.py index 6022829b..c37675c9 100644 --- a/src/eva/models/config.py +++ b/src/eva/models/config.py @@ -41,7 +41,7 @@ def _param_alias(params: dict[str, Any]) -> str: """Return the display alias from a params dict.""" - return params.get("alias") or params.get("model") or "" + return params.get("alias") or params["model"] class PipelineConfig(BaseModel): @@ -82,9 +82,9 @@ class PipelineConfig(BaseModel): def pipeline_parts(self) -> dict[str, str]: """Component names for this pipeline.""" return { - "stt": _param_alias(self.stt_params) or self.stt or "", + "stt": _param_alias(self.stt_params) or self.stt, "llm": self.llm, - "tts": _param_alias(self.tts_params) or self.tts or "", + "tts": _param_alias(self.tts_params) or self.tts, } @model_validator(mode="before") @@ -142,7 +142,7 @@ def pipeline_parts(self) -> dict[str, str]: """Component names for this pipeline.""" return { "audio_llm": _param_alias(self.audio_llm_params) or self.audio_llm, - "tts": _param_alias(self.tts_params) or self.tts or "", + "tts": _param_alias(self.tts_params) or self.tts, } @@ -591,14 +591,6 @@ def apply_env_overrides(self, live: "RunConfig") -> None: continue if has_redacted: - saved_provider = getattr(self.model, provider_field, None) - live_provider = getattr(live.model, provider_field, None) - if saved_provider != live_provider: - raise ValueError( - f"Cannot restore secrets: saved {provider_field}={saved_provider!r} " - f"but current environment has {provider_field}={live_provider!r}" - ) - saved_alias = saved.get("alias") live_alias = source.get("alias") if saved_alias and live_alias and saved_alias != live_alias: @@ -607,6 +599,14 @@ def apply_env_overrides(self, live: "RunConfig") -> None: f"but current environment has {params_field}[alias]={live_alias!r}" ) + saved_provider = getattr(self.model, provider_field, None) + live_provider = getattr(live.model, provider_field, None) + if saved_provider != live_provider: + logger.warning( + f"Provider mismatch for {params_field}: saved {saved_provider!r}, " + f"current environment has {live_provider!r}" + ) + saved_model = saved.get("model") live_model = source.get("model") if saved_model and live_model and saved_model != live_model: