From 0d5b24bd4d58c7747712bf6fa7011d517c32f8ec Mon Sep 17 00:00:00 2001 From: Carlos Herrero Date: Fri, 15 May 2026 09:57:33 +0200 Subject: [PATCH] test(onboard): retry callback request to absorb listener-startup race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TestBuild_OAuthE2E (and the two Microsoft variants) flakes intermittently on CI with `Get http://127.0.0.1:NNNN/callback...: EOF`. The binary binds the local listener and prints the auth URL as soon as net.Listen returns, but the goroutine that calls Accept may not be scheduled yet on a busy runner — the test then fires the callback GET into a half-ready server and trips EOF. simulateCallback now retries on transport errors for up to ~1s (10 attempts at 100ms backoff) and respects the test context. The callback handler is single-use (it triggers server.Shutdown after a successful response), so retries only execute when the first attempt never reached the handler — they cannot mask a real callback failure. Verified by running the three E2E tests with -race -count=5 locally (15 runs, all green). --- cmd/chaperone-onboard/main_test.go | 35 +++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/cmd/chaperone-onboard/main_test.go b/cmd/chaperone-onboard/main_test.go index 6876874..d11782e 100644 --- a/cmd/chaperone-onboard/main_test.go +++ b/cmd/chaperone-onboard/main_test.go @@ -416,19 +416,38 @@ func runE2EConsent(t *testing.T, cmd *exec.Cmd) (string, *strings.Builder) { // simulateCallback sends a GET request to the callback server with the given // authorization code and state. +// +// The binary prints the auth URL to stderr as soon as net.Listen returns, but +// the goroutine that calls Accept may not be scheduled yet on a busy runner. +// Retry briefly on transport errors so this races cleanly with server startup. +// The callback handler is single-use (triggers server.Shutdown on success), so +// retries only fire when the first attempt never reached the handler. func simulateCallback(ctx context.Context, t *testing.T, redirectURI, state, code string) { t.Helper() callbackURL := fmt.Sprintf("%s?code=%s&state=%s", redirectURI, url.QueryEscape(code), url.QueryEscape(state)) - req, err := http.NewRequestWithContext(ctx, http.MethodGet, callbackURL, nil) - if err != nil { - t.Fatalf("failed to create callback request: %v", err) - } - resp, err := http.DefaultClient.Do(req) - if err != nil { - t.Fatalf("callback request failed: %v", err) + + const maxAttempts = 10 + const backoff = 100 * time.Millisecond + var lastErr error + for attempt := 1; attempt <= maxAttempts; attempt++ { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, callbackURL, nil) + if err != nil { + t.Fatalf("failed to create callback request: %v", err) + } + resp, err := http.DefaultClient.Do(req) + if err == nil { + resp.Body.Close() + return + } + lastErr = err + select { + case <-ctx.Done(): + t.Fatalf("callback request failed: %v (ctx: %v)", lastErr, ctx.Err()) + case <-time.After(backoff): + } } - resp.Body.Close() + t.Fatalf("callback request failed after %d attempts: %v", maxAttempts, lastErr) } func TestRun_HelpFlag(t *testing.T) {