diff --git a/.github/run-eval/ADDINGMODEL.md b/.github/run-eval/ADDINGMODEL.md index 48f2d2db95..471a57744a 100644 --- a/.github/run-eval/ADDINGMODEL.md +++ b/.github/run-eval/ADDINGMODEL.md @@ -52,11 +52,20 @@ This file (`resolve_model_config.py`) defines models available for evaluation. M - `openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py` - GPT models only (variant detection) - `openhands-sdk/openhands/sdk/llm/utils/verified_models.py` - Production-ready models - > ⚠️ **When editing `verified_models.py`**: If you add a model to `VERIFIED_OPENHANDS_MODELS`, - > you **must also** add it to its provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`, - > `VERIFIED_GEMINI_MODELS`, `VERIFIED_MOONSHOT_MODELS`, etc.). - > If no list exists for the provider yet, create one and add it to the `VERIFIED_MODELS` dict. - > This ensures the model appears under its actual provider in the UI, not just under "openhands". + > ⛔ **Do NOT add a model to `verified_models.py` unless explicitly asked to.** + > "Verified" means the model has been validated against the OpenHands integration + > test suite **and** an OpenHands maintainer has approved it for the production UI. + > A passing integration run is *necessary but not sufficient*. New models should be + > added to `MODELS` in `resolve_model_config.py` (and `model_features.py` if + > applicable) only — leave `verified_models.py` alone until a maintainer requests it + > in the PR. + > + > ⚠️ **When you are explicitly asked to edit `verified_models.py`**: If you add a + > model to `VERIFIED_OPENHANDS_MODELS`, you **must also** add it to its + > provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`, `VERIFIED_GEMINI_MODELS`, + > `VERIFIED_MOONSHOT_MODELS`, etc.). If no list exists for the provider yet, create + > one and add it to the `VERIFIED_MODELS` dict. This ensures the model appears under + > its actual provider in the UI, not just under "openhands". ## Step 1: Add to resolve_model_config.py diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py index f7e1087af4..be2304a8fd 100755 --- a/.github/run-eval/resolve_model_config.py +++ b/.github/run-eval/resolve_model_config.py @@ -153,6 +153,14 @@ def _sigterm_handler(signum: int, _frame: object) -> None: "temperature": 0.0, }, }, + "gemini-3.5-flash": { + "id": "gemini-3.5-flash", + "display_name": "Gemini 3.5 Flash", + "llm_config": { + "model": "litellm_proxy/gemini-3.5-flash", + "temperature": 0.0, + }, + }, "gpt-5.2": { "id": "gpt-5.2", "display_name": "GPT-5.2", diff --git a/tests/cross/test_resolve_model_config.py b/tests/cross/test_resolve_model_config.py index ad383770be..25e25253cd 100644 --- a/tests/cross/test_resolve_model_config.py +++ b/tests/cross/test_resolve_model_config.py @@ -659,3 +659,13 @@ def test_deepseek_v4_flash_config(): assert model["id"] == "deepseek-v4-flash" assert model["display_name"] == "DeepSeek V4 Flash" assert model["llm_config"]["model"] == "litellm_proxy/deepseek/deepseek-v4-flash" + + +def test_gemini_3_5_flash_config(): + """Test that gemini-3.5-flash has correct configuration.""" + model = MODELS["gemini-3.5-flash"] + + assert model["id"] == "gemini-3.5-flash" + assert model["display_name"] == "Gemini 3.5 Flash" + assert model["llm_config"]["model"] == "litellm_proxy/gemini-3.5-flash" + assert model["llm_config"]["temperature"] == 0.0