From 5ca143c46cf435aae831d37ddf67acba7d3b837e Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 20 May 2026 02:52:08 +0000 Subject: [PATCH 1/2] Add gemini-3.5-flash model configuration Follows ADDINGMODEL.md to add the gemini-3.5-flash preview model released by Google. - Add gemini-3.5-flash entry to MODELS in resolve_model_config.py (litellm_proxy/gemini-3.5-flash-preview, temperature 0.0), following the existing gemini-3-flash / gemini-3.1-pro pattern. - Register gemini-3.5-flash and gemini-3.5-flash-preview in VERIFIED_GEMINI_MODELS and VERIFIED_OPENHANDS_MODELS so the model appears under the Gemini provider in the UI. - Add test_gemini_3_5_flash_config to tests/cross/test_resolve_model_config.py. PROMPT_CACHE_MODELS already covers any 'gemini-3' prefix, and reasoning_effort support is auto-detected from LiteLLM, so no changes are needed in model_features.py. Fixes #3312 Co-authored-by: openhands --- .github/run-eval/resolve_model_config.py | 8 ++++++++ .../openhands/sdk/llm/utils/verified_models.py | 4 ++++ tests/cross/test_resolve_model_config.py | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py index f7e1087af4..eed9810bbc 100755 --- a/.github/run-eval/resolve_model_config.py +++ b/.github/run-eval/resolve_model_config.py @@ -153,6 +153,14 @@ def _sigterm_handler(signum: int, _frame: object) -> None: "temperature": 0.0, }, }, + "gemini-3.5-flash": { + "id": "gemini-3.5-flash", + "display_name": "Gemini 3.5 Flash", + "llm_config": { + "model": "litellm_proxy/gemini-3.5-flash-preview", + "temperature": 0.0, + }, + }, "gpt-5.2": { "id": "gpt-5.2", "display_name": "GPT-5.2", diff --git a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py index 9d53d84940..52078539f1 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py +++ b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py @@ -53,6 +53,8 @@ VERIFIED_GEMINI_MODELS = [ "gemini-3.1-pro-preview", "gemini-3.1-pro", + "gemini-3.5-flash-preview", + "gemini-3.5-flash", "gemini-3-flash", "gemini-3-pro", ] @@ -106,6 +108,8 @@ "minimax-m2.7", "gemini-3.1-pro", "gemini-3.1-pro-preview", + "gemini-3.5-flash", + "gemini-3.5-flash-preview", "gemini-3-flash", "gemini-3-pro", "deepseek-chat", diff --git a/tests/cross/test_resolve_model_config.py b/tests/cross/test_resolve_model_config.py index ad383770be..0219f82afa 100644 --- a/tests/cross/test_resolve_model_config.py +++ b/tests/cross/test_resolve_model_config.py @@ -659,3 +659,13 @@ def test_deepseek_v4_flash_config(): assert model["id"] == "deepseek-v4-flash" assert model["display_name"] == "DeepSeek V4 Flash" assert model["llm_config"]["model"] == "litellm_proxy/deepseek/deepseek-v4-flash" + + +def test_gemini_3_5_flash_config(): + """Test that gemini-3.5-flash has correct configuration.""" + model = MODELS["gemini-3.5-flash"] + + assert model["id"] == "gemini-3.5-flash" + assert model["display_name"] == "Gemini 3.5 Flash" + assert model["llm_config"]["model"] == "litellm_proxy/gemini-3.5-flash-preview" + assert model["llm_config"]["temperature"] == 0.0 From dcea291fe1e5bbd47386cb6bb80367caf14641d0 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 20 May 2026 03:23:06 +0000 Subject: [PATCH 2/2] Address review: drop -preview suffix and remove verified-list entries 1) Fix the LiteLLM model name: per the reviewer, the model is named 'gemini-3.5-flash', not 'gemini-3.5-flash-preview'. Update the entry in resolve_model_config.py and the matching assertion in tests/cross/test_resolve_model_config.py. 2) Remove gemini-3.5-flash{,-preview} from VERIFIED_GEMINI_MODELS and VERIFIED_OPENHANDS_MODELS. The model has not been verified yet -- integration tests have not passed -- so it does not belong in the verified list. 3) Update ADDINGMODEL.md with explicit guidance that models should not be added to verified_models.py unless explicitly asked for by a maintainer; passing integration tests is necessary but not sufficient. Co-authored-by: openhands --- .github/run-eval/ADDINGMODEL.md | 19 ++++++++++++++----- .github/run-eval/resolve_model_config.py | 2 +- .../sdk/llm/utils/verified_models.py | 4 ---- tests/cross/test_resolve_model_config.py | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/.github/run-eval/ADDINGMODEL.md b/.github/run-eval/ADDINGMODEL.md index 48f2d2db95..471a57744a 100644 --- a/.github/run-eval/ADDINGMODEL.md +++ b/.github/run-eval/ADDINGMODEL.md @@ -52,11 +52,20 @@ This file (`resolve_model_config.py`) defines models available for evaluation. M - `openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py` - GPT models only (variant detection) - `openhands-sdk/openhands/sdk/llm/utils/verified_models.py` - Production-ready models - > ⚠️ **When editing `verified_models.py`**: If you add a model to `VERIFIED_OPENHANDS_MODELS`, - > you **must also** add it to its provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`, - > `VERIFIED_GEMINI_MODELS`, `VERIFIED_MOONSHOT_MODELS`, etc.). - > If no list exists for the provider yet, create one and add it to the `VERIFIED_MODELS` dict. - > This ensures the model appears under its actual provider in the UI, not just under "openhands". + > ⛔ **Do NOT add a model to `verified_models.py` unless explicitly asked to.** + > "Verified" means the model has been validated against the OpenHands integration + > test suite **and** an OpenHands maintainer has approved it for the production UI. + > A passing integration run is *necessary but not sufficient*. New models should be + > added to `MODELS` in `resolve_model_config.py` (and `model_features.py` if + > applicable) only — leave `verified_models.py` alone until a maintainer requests it + > in the PR. + > + > ⚠️ **When you are explicitly asked to edit `verified_models.py`**: If you add a + > model to `VERIFIED_OPENHANDS_MODELS`, you **must also** add it to its + > provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`, `VERIFIED_GEMINI_MODELS`, + > `VERIFIED_MOONSHOT_MODELS`, etc.). If no list exists for the provider yet, create + > one and add it to the `VERIFIED_MODELS` dict. This ensures the model appears under + > its actual provider in the UI, not just under "openhands". ## Step 1: Add to resolve_model_config.py diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py index eed9810bbc..be2304a8fd 100755 --- a/.github/run-eval/resolve_model_config.py +++ b/.github/run-eval/resolve_model_config.py @@ -157,7 +157,7 @@ def _sigterm_handler(signum: int, _frame: object) -> None: "id": "gemini-3.5-flash", "display_name": "Gemini 3.5 Flash", "llm_config": { - "model": "litellm_proxy/gemini-3.5-flash-preview", + "model": "litellm_proxy/gemini-3.5-flash", "temperature": 0.0, }, }, diff --git a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py index 52078539f1..9d53d84940 100644 --- a/openhands-sdk/openhands/sdk/llm/utils/verified_models.py +++ b/openhands-sdk/openhands/sdk/llm/utils/verified_models.py @@ -53,8 +53,6 @@ VERIFIED_GEMINI_MODELS = [ "gemini-3.1-pro-preview", "gemini-3.1-pro", - "gemini-3.5-flash-preview", - "gemini-3.5-flash", "gemini-3-flash", "gemini-3-pro", ] @@ -108,8 +106,6 @@ "minimax-m2.7", "gemini-3.1-pro", "gemini-3.1-pro-preview", - "gemini-3.5-flash", - "gemini-3.5-flash-preview", "gemini-3-flash", "gemini-3-pro", "deepseek-chat", diff --git a/tests/cross/test_resolve_model_config.py b/tests/cross/test_resolve_model_config.py index 0219f82afa..25e25253cd 100644 --- a/tests/cross/test_resolve_model_config.py +++ b/tests/cross/test_resolve_model_config.py @@ -667,5 +667,5 @@ def test_gemini_3_5_flash_config(): assert model["id"] == "gemini-3.5-flash" assert model["display_name"] == "Gemini 3.5 Flash" - assert model["llm_config"]["model"] == "litellm_proxy/gemini-3.5-flash-preview" + assert model["llm_config"]["model"] == "litellm_proxy/gemini-3.5-flash" assert model["llm_config"]["temperature"] == 0.0