From 7c074bf80a48d96722b5e14c2ac94407a63f239c Mon Sep 17 00:00:00 2001
From: Jason Barnett <jason.barnett@altana.ai>
Date: Tue, 28 Apr 2026 14:46:50 +0000
Subject: [PATCH] Skip retries for pytest collection errors
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a test file fails to import, pytest fires pytest_collectreport
instead of the pytest_runtest_* hooks. The test-collector-python plugin
(PR #108) now tags these entries with test.pytest_collection_error=true
in the JSON report.

Parse the tags field from the JSON report and use it to mark collection
errors. These are excluded from retry candidates since retrying a broken
import is pointless — the underlying issue is a syntax error or missing
dependency, not a flaky test.

Collection errors still count as failures in the run status and appear
in the report under a dedicated "Collection Errors" section.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 internal/command/run.go                       |  9 +++
 internal/runner/pytest.go                     | 10 ++-
 internal/runner/pytest_test.go                | 32 +++++++++
 internal/runner/run_result.go                 | 58 ++++++++++++++--
 internal/runner/run_result_test.go            | 69 +++++++++++++++++++
 internal/runner/test_result.go                |  7 +-
 .../pytest/result-collection-error.json       |  4 ++
 .../test-engine-result-with-tags.json         | 19 +++++
 8 files changed, 197 insertions(+), 11 deletions(-)
 create mode 100644 internal/runner/testdata/pytest/result-collection-error.json
 create mode 100644 internal/runner/testdata/test-engine-result-with-tags.json

diff --git a/internal/command/run.go b/internal/command/run.go
index b4eb3d89..41d242e6 100644
--- a/internal/command/run.go
+++ b/internal/command/run.go
@@ -171,6 +171,15 @@ func printReport(runResult runner.RunResult, testsSkippedByTestEngine []plan.Tes
 			}
 		}
 
+		collectionErrors := runResult.CollectionErrors()
+		if len(collectionErrors) > 0 {
+			fmt.Println("")
+			fmt.Println("+++ Collection Errors (will not be retried):")
+			for _, ce := range collectionErrors {
+				fmt.Printf("- %s %s\n", ce.Scope, ce.Name)
+			}
+		}
+
 		failedTests := runResult.FailedTests()
 		if len(failedTests) > 0 {
 			fmt.Println("")
diff --git a/internal/runner/pytest.go b/internal/runner/pytest.go
index 2241c6bf..55f06859 100644
--- a/internal/runner/pytest.go
+++ b/internal/runner/pytest.go
@@ -96,7 +96,7 @@ func (p Pytest) Run(result *RunResult, testCases []plan.TestCase, retry bool) er
 	}
 
 	for _, test := range tests {
-		result.RecordTestResult(plan.TestCase{
+		tc := plan.TestCase{
 			Identifier: test.Id,
 			Format:     plan.TestCaseFormatExample,
 			Scope:      test.Scope,
@@ -104,7 +104,13 @@ func (p Pytest) Run(result *RunResult, testCases []plan.TestCase, retry bool) er
 			// pytest can execute individual test using node id, which is a filename, classname (if any), and function, separated by `::`.
 			// Ref: https://docs.pytest.org/en/6.2.x/usage.html#nodeids
 			Path: fmt.Sprintf("%s::%s", test.Scope, test.Name),
-		}, test.Result)
+		}
+
+		if test.Tags["test.pytest_collection_error"] == "true" {
+			result.RecordCollectionError(tc)
+		} else {
+			result.RecordTestResult(tc, test.Result)
+		}
 	}
 
 	// Return any command error after processing the report
diff --git a/internal/runner/pytest_test.go b/internal/runner/pytest_test.go
index 32bf0874..61d50815 100644
--- a/internal/runner/pytest_test.go
+++ b/internal/runner/pytest_test.go
@@ -91,6 +91,38 @@ func TestPytestRun_TestFailed(t *testing.T) {
 	}
 }
 
+func TestPytestRun_CollectionErrorNotRetried(t *testing.T) {
+	changeCwd(t, "./testdata/pytest")
+
+	pytest := NewPytest(RunnerConfig{
+		TestCommand: "pytest",
+		ResultPath:  "result-collection-error.json",
+	})
+	testCases := []plan.TestCase{
+		{Path: "failed_test.py"},
+	}
+	result := NewRunResult([]plan.TestCase{})
+	err := pytest.Run(result, testCases, false)
+
+	exitError := new(exec.ExitError)
+	assert.ErrorAs(t, err, &exitError)
+
+	if result.Status() != RunStatusFailed {
+		t.Errorf("RunResult.Status = %v, want %v", result.Status(), RunStatusFailed)
+	}
+
+	// FailedTests should only contain the real test failure, not the collection error
+	failedTests := result.FailedTests()
+	if len(failedTests) != 1 {
+		t.Errorf("len(FailedTests()) = %d, want 1", len(failedTests))
+	}
+	if len(failedTests) > 0 {
+		if failedTests[0].Name != "test_failed" {
+			t.Errorf("FailedTests()[0].Name = %q, want %q", failedTests[0].Name, "test_failed")
+		}
+	}
+}
+
 func TestPytestRun_TestFailedWithoutResultFile(t *testing.T) {
 	changeCwd(t, "./testdata/pytest")
 
diff --git a/internal/runner/run_result.go b/internal/runner/run_result.go
index 47bd09da..70a32a9a 100644
--- a/internal/runner/run_result.go
+++ b/internal/runner/run_result.go
@@ -76,12 +76,26 @@ func (r *RunResult) RecordTestResult(testCase plan.TestCase, status TestStatus)
 	}
 }
 
+// RecordCollectionError records a test collection error (e.g. import failure).
+// Collection errors are recorded as failed but excluded from retries.
+func (r *RunResult) RecordCollectionError(testCase plan.TestCase) {
+	test := r.getTest(testCase)
+	test.Status = TestStatusFailed
+	test.CollectionError = true
+	test.ExecutionCount++
+	if r.mutedTestLookup[mutedTestIdentifier(testCase)] {
+		test.Muted = true
+	}
+}
+
 // FailedTests returns a list of test cases that failed.
+// Collection errors are excluded because retrying them is pointless
+// (the underlying issue is a broken import or syntax error, not a flaky test).
 func (r *RunResult) FailedTests() []plan.TestCase {
 	var failedTests []plan.TestCase
 
 	for _, test := range r.tests {
-		if test.Status == TestStatusFailed && !test.Muted {
+		if test.Status == TestStatusFailed && !test.Muted && !test.CollectionError {
 			failedTests = append(failedTests, test.TestCase)
 		}
 	}
@@ -89,6 +103,17 @@ func (r *RunResult) FailedTests() []plan.TestCase {
 	return failedTests
 }
 
+// CollectionErrors returns test cases that failed due to collection errors (e.g. import failures).
+func (r *RunResult) CollectionErrors() []plan.TestCase {
+	var errors []plan.TestCase
+	for _, test := range r.tests {
+		if test.CollectionError && !test.Muted {
+			errors = append(errors, test.TestCase)
+		}
+	}
+	return errors
+}
+
 func (r *RunResult) MutedTests() []TestResult {
 	var mutedTests []TestResult
 	for _, test := range r.tests {
@@ -115,7 +140,7 @@ func (r *RunResult) FailedMutedTests() []plan.TestCase {
 	var failedTests []plan.TestCase
 
 	for _, test := range r.tests {
-		if test.Status == TestStatusFailed && test.Muted {
+		if test.Status == TestStatusFailed && test.Muted && !test.CollectionError {
 			failedTests = append(failedTests, test.TestCase)
 		}
 	}
@@ -132,6 +157,26 @@ func (r *RunResult) passedTestsCount() int {
 	return count
 }
 
+func (r *RunResult) collectionErrorCount() int {
+	count := 0
+	for _, test := range r.tests {
+		if test.CollectionError && !test.Muted {
+			count++
+		}
+	}
+	return count
+}
+
+func (r *RunResult) mutedCollectionErrorCount() int {
+	count := 0
+	for _, test := range r.tests {
+		if test.CollectionError && test.Muted {
+			count++
+		}
+	}
+	return count
+}
+
 func (r *RunResult) skippedTestsCount() int {
 	count := 0
 	for _, test := range r.tests {
@@ -163,7 +208,7 @@ func (r *RunResult) OnlyMutedFailures() bool {
 
 // Status returns the overall status of the test run.
 // If there is an error, it returns RunStatusError.
-// If there are failed tests, it returns RunStatusFailed.
+// If there are any non-muted failed tests (including collection errors), it returns RunStatusFailed.
 // Otherwise, it returns RunStatusPassed.
 func (r *RunResult) Status() RunStatus {
 	if r.error != nil {
@@ -174,11 +219,11 @@ func (r *RunResult) Status() RunStatus {
 		return RunStatusUnknown
 	}
 
-	if len(r.FailedTests()) > 0 {
+	if len(r.FailedTests()) > 0 || r.collectionErrorCount() > 0 {
 		return RunStatusFailed
 	}
 
-	if r.passedTestsCount()+r.skippedTestsCount()+len(r.FailedMutedTests()) == len(r.tests) {
+	if r.passedTestsCount()+r.skippedTestsCount()+len(r.FailedMutedTests())+r.mutedCollectionErrorCount() == len(r.tests) {
 		return RunStatusPassed
 	}
 
@@ -241,8 +286,9 @@ type TestEngineTest struct {
 	Name     string
 	Scope    string
 	Location string
-	FileName string `json:"file_name,omitempty"`
+	FileName string            `json:"file_name,omitempty"`
 	Result   TestStatus
+	Tags     map[string]string `json:"tags,omitempty"`
 }
 
 func parseTestEngineTestResult(path string) ([]TestEngineTest, error) {
diff --git a/internal/runner/run_result_test.go b/internal/runner/run_result_test.go
index 30269504..7c6f86a4 100644
--- a/internal/runner/run_result_test.go
+++ b/internal/runner/run_result_test.go
@@ -309,6 +309,56 @@ func TestOnlyMutedFailures(t *testing.T) {
 	}
 }
 
+func TestCollectionErrors_ExcludedFromFailedTests(t *testing.T) {
+	r := NewRunResult([]plan.TestCase{})
+
+	realFailure := plan.TestCase{Scope: "tests/math.py", Name: "test_add"}
+	collectionErr := plan.TestCase{Scope: "tests/broken.py", Name: "tests/broken.py"}
+
+	r.RecordTestResult(realFailure, TestStatusFailed)
+	r.RecordCollectionError(collectionErr)
+
+	if r.Status() != RunStatusFailed {
+		t.Errorf("Status() = %s, want %s", r.Status(), RunStatusFailed)
+	}
+
+	failedTests := r.FailedTests()
+	if len(failedTests) != 1 {
+		t.Fatalf("len(FailedTests()) = %d, want 1", len(failedTests))
+	}
+	if failedTests[0].Name != "test_add" {
+		t.Errorf("FailedTests()[0].Name = %q, want %q", failedTests[0].Name, "test_add")
+	}
+}
+
+func TestCollectionErrors_ExcludedFromFailedMutedTests(t *testing.T) {
+	collectionErr := plan.TestCase{Scope: "tests/broken.py", Name: "tests/broken.py"}
+
+	r := NewRunResult([]plan.TestCase{collectionErr})
+	r.RecordCollectionError(collectionErr)
+
+	failedMuted := r.FailedMutedTests()
+	if len(failedMuted) != 0 {
+		t.Errorf("len(FailedMutedTests()) = %d, want 0", len(failedMuted))
+	}
+}
+
+func TestCollectionErrors_OnlyCollectionErrorsStillFailed(t *testing.T) {
+	r := NewRunResult([]plan.TestCase{})
+
+	collectionErr := plan.TestCase{Scope: "tests/broken.py", Name: "tests/broken.py"}
+	r.RecordCollectionError(collectionErr)
+
+	if r.Status() != RunStatusFailed {
+		t.Errorf("Status() = %s, want %s", r.Status(), RunStatusFailed)
+	}
+
+	// No retryable failures
+	if len(r.FailedTests()) != 0 {
+		t.Errorf("len(FailedTests()) = %d, want 0", len(r.FailedTests()))
+	}
+}
+
 func TestParseTestEngineTestResult(t *testing.T) {
 	results, err := parseTestEngineTestResult("testdata/test-engine-result.json")
 	if err != nil {
@@ -318,3 +368,22 @@ func TestParseTestEngineTestResult(t *testing.T) {
 		t.Errorf("len(results) = %d, want 2", len(results))
 	}
 }
+
+func TestParseTestEngineTestResult_WithTags(t *testing.T) {
+	results, err := parseTestEngineTestResult("testdata/test-engine-result-with-tags.json")
+	if err != nil {
+		t.Fatalf("parseTestEngineTestResult() error = %v", err)
+	}
+	if len(results) != 2 {
+		t.Fatalf("len(results) = %d, want 2", len(results))
+	}
+
+	if results[0].Tags != nil {
+		t.Errorf("results[0].Tags = %v, want nil", results[0].Tags)
+	}
+
+	wantTag := "true"
+	if got := results[1].Tags["test.pytest_collection_error"]; got != wantTag {
+		t.Errorf("results[1].Tags[\"test.pytest_collection_error\"] = %q, want %q", got, wantTag)
+	}
+}
diff --git a/internal/runner/test_result.go b/internal/runner/test_result.go
index 4b733e44..0e70a93b 100644
--- a/internal/runner/test_result.go
+++ b/internal/runner/test_result.go
@@ -14,9 +14,10 @@ const (
 // TestResult is a struct to keep track the result of an individual test case.
 type TestResult struct {
 	plan.TestCase
-	Status         TestStatus
-	ExecutionCount int
-	Muted          bool
+	Status          TestStatus
+	ExecutionCount  int
+	Muted           bool
+	CollectionError bool
 }
 
 // testIdentifier returns a unique identifier for a test case based on its scope, name and path.
diff --git a/internal/runner/testdata/pytest/result-collection-error.json b/internal/runner/testdata/pytest/result-collection-error.json
new file mode 100644
index 00000000..8f1c9a7a
--- /dev/null
+++ b/internal/runner/testdata/pytest/result-collection-error.json
@@ -0,0 +1,4 @@
+[
+  {"id": "a1be7e52-0dba-4018-83ce-a1598ca68807", "scope": "tests/failed_test.py", "name": "test_failed", "location": "tests/failed_test.py:0", "file_name": "tests/failed_test.py", "result": "failed", "failure_reason": "def test_failed():\n>       assert 3 == 5\nE       assert 3 == 5\n\ntests/failed_test.py:2: AssertionError"},
+  {"id": "c0ffee00-dead-beef-cafe-123456789abc", "scope": "tests/broken_import.py", "name": "tests/broken_import.py", "location": "tests/broken_import.py", "file_name": "tests/broken_import.py", "result": "failed", "failure_reason": "ModuleNotFoundError: No module named 'nonexistent'", "tags": {"test.pytest_collection_error": "true"}}
+]
diff --git a/internal/runner/testdata/test-engine-result-with-tags.json b/internal/runner/testdata/test-engine-result-with-tags.json
new file mode 100644
index 00000000..b5945979
--- /dev/null
+++ b/internal/runner/testdata/test-engine-result-with-tags.json
@@ -0,0 +1,19 @@
+[
+  {
+    "id": "db2c4ffa-efee-4b6a-b293-a4c4168aa353",
+    "scope": "tests/test_sample.py",
+    "name": "test_happy",
+    "location": "tests/test_sample.py:0",
+    "file_name": "tests/test_sample.py",
+    "result": "passed"
+  },
+  {
+    "id": "c0ffee00-dead-beef-cafe-123456789abc",
+    "scope": "tests/broken_import.py",
+    "name": "tests/broken_import.py",
+    "location": "tests/broken_import.py",
+    "file_name": "tests/broken_import.py",
+    "result": "failed",
+    "tags": {"test.pytest_collection_error": "true"}
+  }
+]