Skip to content

Commit 855ba3c

Browse files
author
aibuddy
committed
feat(oai): parameter-recovery on 400 invalid temperature with one-time retry
Implements a targeted recovery in `CreateChatCompletion`: when the API returns 400 indicating an unsupported/invalid `temperature`, drop the field and retry once without consuming the normal retry budget. Adds focused integration test to prove behavior and checks off the checklist item. Keeps all tests green. Refs: #1
1 parent f3204b8 commit 855ba3c

4 files changed

Lines changed: 100 additions & 6 deletions

File tree

FEATURE_CHECKLIST.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@
283283
- [x] Add a tiny checker that diffs `docs/reference/cli-reference.md` against `./bin/agentcli -h` and a local test that runs it.
284284
* [x] LLM policy docs: update the policy page to state default `temperature=1.0`, show GPT-5 controls like `verbosity` (`low|medium|high`) and `reasoning_effort`, and note that some models restrict sampling knobs; **DoD:** page updated and reviewed locally (no external link checks required).
285285
* [x] Config precedence for temperature: implement resolution order `--temperature` > `LLM_TEMPERATURE` env > config file > default 1.0, and when `--top-p` is provided unset `temperature` (with a one-line warning to stderr); add local flag-parsing table tests covering overlaps and edge cases; **DoD:** `go test ./...` green and manual runs show correct precedence.
286-
* [ ] Parameter-recovery retry: when the API returns HTTP 400 mentioning invalid/unsupported `temperature`, remove `temperature` from the payload and retry **once** before normal exponential backoff; log the recovery with a structured field; **DoD:** local integration test using a mock server that first 400s on `temperature` and then succeeds without it.
286+
* [x] Parameter-recovery retry: when the API returns HTTP 400 mentioning invalid/unsupported `temperature`, remove `temperature` from the payload and retry **once** before normal exponential backoff; log the recovery with a structured field; **DoD:** local integration test using a mock server that first 400s on `temperature` and then succeeds without it.
287287
* [ ] Temperature-nudge guard: make the nudge logic a no-op when the selected model does not support temperature and clamp adjustments within `[0.1, 1.0]` otherwise; include local unit tests that simulate repetition/format-failure to trigger −0.1 and diversity to trigger +0.1 without exceeding bounds; **DoD:** `go test ./...` passes locally.
288288
* [ ] ADR addendum: append an addendum to the default-policy ADR documenting the change to default `temperature=1.0` for API parity and GPT-5 compatibility with a concise rationale and rollout note; **DoD:** ADR renders locally and is linked from the docs index.
289289
* [ ] One-knob sampling rule: enforce that if the user passes `--top-p` then `temperature` is omitted from the payload, and when `--top-p` is absent send `temperature` (default 1.0) with `top_p` unset; add local tests for precedence and serialization and a one-sentence rule in the docs; **DoD:** tests pass locally and docs updated.

internal/oai/client.go

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
8282
attempts = 1
8383
}
8484

85-
var lastErr error
85+
var lastErr error
86+
// Allow a single parameter-recovery retry without consuming the normal retry budget
87+
recoveryGranted := false
8688
// Generate a stable Idempotency-Key used across all attempts
8789
idemKey := generateIdempotencyKey()
8890
for attempt := 0; attempt < attempts; attempt++ {
@@ -170,8 +172,32 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
170172
}
171173
return zero, fmt.Errorf("read response body: %w", readErr)
172174
}
173-
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
174-
// Retry on 429 and 5xx; otherwise return immediately
175+
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
176+
// Parameter-recovery: if 400 mentions invalid/unsupported temperature and
177+
// the request included temperature, remove it and retry once immediately.
178+
if resp.StatusCode == http.StatusBadRequest {
179+
// Capture body string for inspection and logs
180+
bodyStr := string(respBody)
181+
if !recoveryGranted && includesTemperature(req) && mentionsUnsupportedTemperature(bodyStr) {
182+
// Log recovery attempt with a structured audit entry
183+
logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, "param_recovery: temperature")
184+
// Clear temperature and re-marshal request for a one-time recovery retry
185+
req.Temperature = nil
186+
nb, merr := json.Marshal(req)
187+
if merr == nil {
188+
body = nb
189+
// Grant exactly one extra attempt for recovery
190+
recoveryGranted = true
191+
attempts++
192+
// Emit timing audit for the failed attempt before retrying
193+
logHTTPTiming(attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "param_recovery_temperature")
194+
// Perform immediate recovery retry without consuming a normal retry slot
195+
continue
196+
}
197+
// If marshal fails, fall through to normal error handling
198+
}
199+
}
200+
// Retry on 429 and 5xx; otherwise return immediately
175201
if attempt < attempts-1 && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode >= 500) {
176202
// Respect Retry-After when present; otherwise use exponential backoff
177203
if ra, ok := retryAfterDuration(resp.Header.Get("Retry-After"), time.Now()); ok {
@@ -188,7 +214,7 @@ func (c *Client) CreateChatCompletion(ctx context.Context, req ChatCompletionsRe
188214
continue
189215
}
190216
// Final non-retryable failure: log attempt (no backoff) and return
191-
logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, truncate(string(respBody), 2000))
217+
logHTTPAttempt(attempt+1, attempts, resp.StatusCode, 0, endpoint, truncate(string(respBody), 2000))
192218
logHTTPTiming(attempt+1, endpoint, resp.StatusCode, attemptStart, dnsDur, connDur, 0, wroteAt, firstByteAt, time.Now(), "http_status", "")
193219
return zero, fmt.Errorf("chat API %s: %d: %s", endpoint, resp.StatusCode, truncate(string(respBody), 2000))
194220
}

internal/oai/client_test.go

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,57 @@ func TestCreateChatCompletion_TemperaturePreservedWhenSupported(t *testing.T) {
172172
}
173173
}
174174

175+
// Parameter-recovery retry: when the server responds 400 mentioning invalid/unsupported
176+
// temperature, the client should remove temperature and retry once before any normal retries.
177+
func TestCreateChatCompletion_ParameterRecovery_InvalidTemperature(t *testing.T) {
178+
attempts := 0
179+
var firstReqHadTemp bool
180+
var secondReqHadTemp bool
181+
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
182+
attempts++
183+
var req ChatCompletionsRequest
184+
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
185+
t.Fatalf("decode: %v", err)
186+
}
187+
if attempts == 1 {
188+
firstReqHadTemp = req.Temperature != nil
189+
// Simulate OpenAI-style 400 error indicating unsupported temperature
190+
w.WriteHeader(http.StatusBadRequest)
191+
_, _ = w.Write([]byte(`{"error":{"message":"parameter 'temperature' is unsupported for this model"}}`))
192+
return
193+
}
194+
secondReqHadTemp = req.Temperature != nil
195+
// On retry, succeed
196+
resp := ChatCompletionsResponse{Choices: []ChatCompletionsResponseChoice{{Message: Message{Role: RoleAssistant, Content: "ok"}}}}
197+
if err := json.NewEncoder(w).Encode(resp); err != nil {
198+
t.Fatalf("encode: %v", err)
199+
}
200+
}))
201+
defer srv.Close()
202+
203+
// No normal retries; parameter-recovery should still allow exactly one retry
204+
c := NewClientWithRetry(srv.URL, "", 2*time.Second, RetryPolicy{MaxRetries: 0})
205+
temp := 0.5
206+
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
207+
defer cancel()
208+
out, err := c.CreateChatCompletion(ctx, ChatCompletionsRequest{Model: "oss-gpt-20b", Messages: []Message{{Role: RoleUser, Content: "x"}}, Temperature: &temp})
209+
if err != nil {
210+
t.Fatalf("call: %v", err)
211+
}
212+
if out.Choices[0].Message.Content != "ok" {
213+
t.Fatalf("unexpected content: %+v", out)
214+
}
215+
if attempts != 2 {
216+
t.Fatalf("expected 2 attempts (1st 400, 2nd success), got %d", attempts)
217+
}
218+
if !firstReqHadTemp {
219+
t.Fatalf("expected temperature set on first request")
220+
}
221+
if secondReqHadTemp {
222+
t.Fatalf("expected temperature to be removed on retry after 400")
223+
}
224+
}
225+
175226
// https://github.com/hyperifyio/goagent/issues/216
176227
func TestCreateChatCompletion_RetryTimeoutThenSuccess(t *testing.T) {
177228
attempts := 0

internal/oai/types.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
package oai
22

3-
import "encoding/json"
3+
import (
4+
"encoding/json"
5+
"strings"
6+
)
47

58
// Message roles
69
const (
@@ -55,6 +58,20 @@ type ChatCompletionsRequest struct {
5558
Temperature *float64 `json:"temperature,omitempty"`
5659
}
5760

61+
// includesTemperature reports whether the request currently has a temperature set.
62+
func includesTemperature(req ChatCompletionsRequest) bool { return req.Temperature != nil }
63+
64+
// mentionsUnsupportedTemperature detects common API error messages indicating
65+
// that the temperature parameter is invalid or unsupported for the model.
66+
func mentionsUnsupportedTemperature(body string) bool {
67+
s := strings.ToLower(body)
68+
if s == "" {
69+
return false
70+
}
71+
return (strings.Contains(s, "unsupported") && strings.Contains(s, "temperature")) ||
72+
(strings.Contains(s, "invalid") && strings.Contains(s, "temperature"))
73+
}
74+
5875
// ChatCompletionsResponse represents the response for chat completions.
5976
type ChatCompletionsResponse struct {
6077
ID string `json:"id"`

0 commit comments

Comments
 (0)