feat(oai): parameter-recovery on 400 invalid temperature with one-time retry

aibuddy · aibuddy · commit 855ba3cc07fe · 2025-08-17T11:08:20.000Z
Implements a targeted recovery in `CreateChatCompletion`: when the API returns 400 indicating an unsupported/invalid `temperature`, drop the field and retry once without consuming the normal retry budget. Adds focused integration test to prove behavior and checks off the checklist item. Keeps all tests green. Refs: #1
diff --git a/FEATURE_CHECKLIST.md b/FEATURE_CHECKLIST.md
@@ -283,7 +283,7 @@
   - [x] Add a tiny checker that diffs `docs/reference/cli-reference.md` against `./bin/agentcli -h` and a local test that runs it.
 * [x] LLM policy docs: update the policy page to state default `temperature=1.0`, show GPT-5 controls like `verbosity` (`low|medium|high`) and `reasoning_effort`, and note that some models restrict sampling knobs; **DoD:** page updated and reviewed locally (no external link checks required).
 * [x] Config precedence for temperature: implement resolution order `--temperature` > `LLM_TEMPERATURE` env > config file > default 1.0, and when `--top-p` is provided unset `temperature` (with a one-line warning to stderr); add local flag-parsing table tests covering overlaps and edge cases; **DoD:** `go test ./...` green and manual runs show correct precedence.
-* [ ] Parameter-recovery retry: when the API returns HTTP 400 mentioning invalid/unsupported `temperature`, remove `temperature` from the payload and retry **once** before normal exponential backoff; log the recovery with a structured field; **DoD:** local integration test using a mock server that first 400s on `temperature` and then succeeds without it.
+* [x] Parameter-recovery retry: when the API returns HTTP 400 mentioning invalid/unsupported `temperature`, remove `temperature` from the payload and retry **once** before normal exponential backoff; log the recovery with a structured field; **DoD:** local integration test using a mock server that first 400s on `temperature` and then succeeds without it.
 * [ ] Temperature-nudge guard: make the nudge logic a no-op when the selected model does not support temperature and clamp adjustments within `[0.1, 1.0]` otherwise; include local unit tests that simulate repetition/format-failure to trigger −0.1 and diversity to trigger +0.1 without exceeding bounds; **DoD:** `go test ./...` passes locally.
 * [ ] ADR addendum: append an addendum to the default-policy ADR documenting the change to default `temperature=1.0` for API parity and GPT-5 compatibility with a concise rationale and rollout note; **DoD:** ADR renders locally and is linked from the docs index.
 * [ ] One-knob sampling rule: enforce that if the user passes `--top-p` then `temperature` is omitted from the payload, and when `--top-p` is absent send `temperature` (default 1.0) with `top_p` unset; add local tests for precedence and serialization and a one-sentence rule in the docs; **DoD:** tests pass locally and docs updated.
diff --git a/internal/oai/client.go b/internal/oai/client.go
@@ -82,7 +82,9 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
 		attempts = 1
 	}
 
-	var lastErr error
+    var lastErr error
+    // Allow a single parameter-recovery retry without consuming the normal retry budget
+    recoveryGranted := false
 	// Generate a stable Idempotency-Key used across all attempts
 	idemKey := generateIdempotencyKey()
 	for attempt := 0; attempt < attempts; attempt++ {
@@ -170,8 +172,32 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
 			}
 			return zero, fmt.Errorf("read response body: %w", readErr)
 		}
-		if resp.StatusCode < 200 || resp.StatusCode >= 300 {
-			// Retry on 429 and 5xx; otherwise return immediately
+        if resp.StatusCode < 200 || resp.StatusCode >= 300 {
+            // Parameter-recovery: if 400 mentions invalid/unsupported temperature and
+            // the request included temperature, remove it and retry once immediately.
+            if resp.StatusCode == http.StatusBadRequest {
+                // Capture body string for inspection and logs
+                bodyStr := string(respBody)
+                if !recoveryGranted && includesTemperature(req) && mentionsUnsupportedTemperature(bodyStr) {
+                    // Log recovery attempt with a structured audit entry
+                    logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, "param_recovery: temperature")
+                    // Clear temperature and re-marshal request for a one-time recovery retry
+                    req.Temperature = nil
+                    nb, merr := json.Marshal(req)
+                    if merr == nil {
+                        body = nb
+                        // Grant exactly one extra attempt for recovery
+                        recoveryGranted = true
+                        attempts++
+                        // Emit timing audit for the failed attempt before retrying
+                        logHTTPTiming(attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "param_recovery_temperature")
+                        // Perform immediate recovery retry without consuming a normal retry slot
+                        continue
+                    }
+                    // If marshal fails, fall through to normal error handling
+                }
+            }
+            // Retry on 429 and 5xx; otherwise return immediately
 			if attempt < attempts-1 && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500) {
 				// Respect Retry-After when present; otherwise use exponential backoff
 				if ra, ok := retryAfterDuration(resp.Header.Get("Retry-After"), time.Now()); ok {
@@ -188,7 +214,7 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
 				continue
 			}
 			// Final non-retryable failure: log attempt (no backoff) and return
-			logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, truncate(string(respBody), 2000))
+            logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, truncate(string(respBody), 2000))
 			logHTTPTiming(attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "")
 			return zero, fmt.Errorf("chat API %s: %d: %s", endpoint, resp.StatusCode, truncate(string(respBody), 2000))
 		}
diff --git a/internal/oai/client_test.go b/internal/oai/client_test.go
@@ -172,6 +172,57 @@ func TestCreateChatCompletion_TemperaturePreservedWhenSupported(t *testing.T) {
 	}
 }
 
+// Parameter-recovery retry: when the server responds 400 mentioning invalid/unsupported
+// temperature, the client should remove temperature and retry once before any normal retries.
+func TestCreateChatCompletion_ParameterRecovery_InvalidTemperature(t *testing.T) {
+    attempts := 0
+    var firstReqHadTemp bool
+    var secondReqHadTemp bool
+    srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+        attempts++
+        var req ChatCompletionsRequest
+        if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+            t.Fatalf("decode: %v", err)
+        }
+        if attempts == 1 {
+            firstReqHadTemp = req.Temperature != nil
+            // Simulate OpenAI-style 400 error indicating unsupported temperature
+            w.WriteHeader(http.StatusBadRequest)
+            _, _ = w.Write([]byte(`{"error":{"message":"parameter 'temperature' is unsupported for this model"}}`))
+            return
+        }
+        secondReqHadTemp = req.Temperature != nil
+        // On retry, succeed
+        resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
+        if err := json.NewEncoder(w).Encode(resp); err != nil {
+            t.Fatalf("encode: %v", err)
+        }
+    }))
+    defer srv.Close()
+
+    // No normal retries; parameter-recovery should still allow exactly one retry
+    c := NewClientWithRetry(srv.URL, "", 2*time.Second, RetryPolicy{MaxRetries: 0})
+    temp := 0.5
+    ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+    defer cancel()
+    out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "oss-gpt-20b", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp})
+    if err != nil {
+        t.Fatalf("call: %v", err)
+    }
+    if out.Choices[0].Message.Content != "ok" {
+        t.Fatalf("unexpected content: %+v", out)
+    }
+    if attempts != 2 {
+        t.Fatalf("expected 2 attempts (1st 400, 2nd success), got %d", attempts)
+    }
+    if !firstReqHadTemp {
+        t.Fatalf("expected temperature set on first request")
+    }
+    if secondReqHadTemp {
+        t.Fatalf("expected temperature to be removed on retry after 400")
+    }
+}
+
 // https://github.com/hyperifyio/goagent/issues/216
 func TestCreateChatCompletion_RetryTimeoutThenSuccess(t *testing.T) {
 	attempts := 0
diff --git a/internal/oai/types.go b/internal/oai/types.go
@@ -1,6 +1,9 @@
 package oai
 
-import "encoding/json"
+import (
+    "encoding/json"
+    "strings"
+)
 
 // Message roles
 const (
@@ -55,6 +58,20 @@ type ChatCompletionsRequest struct {
 	Temperature *float64  `json:"temperature,omitempty"`
 }
 
+// includesTemperature reports whether the request currently has a temperature set.
+func includesTemperature(req ChatCompletionsRequest) bool { return req.Temperature != nil }
+
+// mentionsUnsupportedTemperature detects common API error messages indicating
+// that the temperature parameter is invalid or unsupported for the model.
+func mentionsUnsupportedTemperature(body string) bool {
+    s := strings.ToLower(body)
+    if s == "" {
+        return false
+    }
+    return (strings.Contains(s, "unsupported") && strings.Contains(s, "temperature")) ||
+        (strings.Contains(s, "invalid") && strings.Contains(s, "temperature"))
+}
+
 // ChatCompletionsResponse represents the response for chat completions.
 type ChatCompletionsResponse struct {
 	ID      string                          `json:"id"`