OpenHands · juanmichelini · May 20, 2026 · May 20, 2026
diff --git a/.github/run-eval/ADDINGMODEL.md b/.github/run-eval/ADDINGMODEL.md
@@ -52,11 +52,20 @@ This file (`resolve_model_config.py`) defines models available for evaluation. M
    - `openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py` - GPT models only (variant detection)
    - `openhands-sdk/openhands/sdk/llm/utils/verified_models.py` - Production-ready models
 
-   > ⚠️ **When editing `verified_models.py`**: If you add a model to `VERIFIED_OPENHANDS_MODELS`,
-   > you **must also** add it to its provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`,
-   > `VERIFIED_GEMINI_MODELS`, `VERIFIED_MOONSHOT_MODELS`, etc.).
-   > If no list exists for the provider yet, create one and add it to the `VERIFIED_MODELS` dict.
-   > This ensures the model appears under its actual provider in the UI, not just under "openhands".
+   > ⛔ **Do NOT add a model to `verified_models.py` unless explicitly asked to.**
+   > "Verified" means the model has been validated against the OpenHands integration
+   > test suite **and** an OpenHands maintainer has approved it for the production UI.
+   > A passing integration run is *necessary but not sufficient*. New models should be
+   > added to `MODELS` in `resolve_model_config.py` (and `model_features.py` if
+   > applicable) only — leave `verified_models.py` alone until a maintainer requests it
+   > in the PR.
+   >
+   > ⚠️ **When you are explicitly asked to edit `verified_models.py`**: If you add a
+   > model to `VERIFIED_OPENHANDS_MODELS`, you **must also** add it to its
+   > provider-specific list (e.g. `VERIFIED_ANTHROPIC_MODELS`, `VERIFIED_GEMINI_MODELS`,
+   > `VERIFIED_MOONSHOT_MODELS`, etc.). If no list exists for the provider yet, create
+   > one and add it to the `VERIFIED_MODELS` dict. This ensures the model appears under
+   > its actual provider in the UI, not just under "openhands".
 
 ## Step 1: Add to resolve_model_config.py
 

diff --git a/.github/run-eval/resolve_model_config.py b/.github/run-eval/resolve_model_config.py
@@ -153,6 +153,14 @@ def _sigterm_handler(signum: int, _frame: object) -> None:
             "temperature": 0.0,
         },
     },
+    "gemini-3.5-flash": {
+        "id": "gemini-3.5-flash",
+        "display_name": "Gemini 3.5 Flash",
+        "llm_config": {
+            "model": "litellm_proxy/gemini-3.5-flash",
+            "temperature": 0.0,
+        },
+    },
     "gpt-5.2": {
         "id": "gpt-5.2",
         "display_name": "GPT-5.2",

diff --git a/tests/cross/test_resolve_model_config.py b/tests/cross/test_resolve_model_config.py
@@ -659,3 +659,13 @@ def test_deepseek_v4_flash_config():
     assert model["id"] == "deepseek-v4-flash"
     assert model["display_name"] == "DeepSeek V4 Flash"
     assert model["llm_config"]["model"] == "litellm_proxy/deepseek/deepseek-v4-flash"
+
+
+def test_gemini_3_5_flash_config():
+    """Test that gemini-3.5-flash has correct configuration."""
+    model = MODELS["gemini-3.5-flash"]
+
+    assert model["id"] == "gemini-3.5-flash"
+    assert model["display_name"] == "Gemini 3.5 Flash"
+    assert model["llm_config"]["model"] == "litellm_proxy/gemini-3.5-flash"
+    assert model["llm_config"]["temperature"] == 0.0