diff --git a/README.md b/README.md index 1417902..a0e6196 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,12 @@ Four detectors run against each commit, each producing findings at a confidence **High confidence** -- strong signals that an AI tool authored or co-authored the commit: - Known AI bot committer emails (Claude, Copilot, Cursor, Codex, Gemini Code Assist, Amazon Q, Devin, Cline, Continue.dev, Cody, JetBrains AI, CodeRabbit). Also matches on the numeric prefix of GitHub noreply emails, so bot username renames don't break detection. - `Co-Authored-By` trailers with known AI tool emails (Claude Code, Cursor, Aider). +- AI session ID trailers (such as Replit-Commit-Session-Id) combined with other known commit trailers, indicating that the commit was generated as part of an AI conversation or workflow. **Medium confidence** -- patterns in the commit message itself: - `aider:` prefix (Aider's default commit format). - `Generated with Claude Code` footer. -- Known commit trailers in formats unique to specific tools (such as EntireIO and Replit) that can contain values indicative of AI use +- Known commit trailers in formats unique to specific tools (such as EntireIO, Replit Agent/Assistant) that can contain values indicative of AI use. **Low confidence** -- mentions of AI tool names in text: diff --git a/detection/detection.go b/detection/detection.go index 04cacfd..f40c92f 100644 --- a/detection/detection.go +++ b/detection/detection.go @@ -22,6 +22,10 @@ func (c Confidence) String() string { } } +func (c *Confidence) Increment() { + *c = min(*c+1, ConfidenceHigh) +} + // Finding represents a single detection of AI involvement. type Finding struct { Detector string `json:"detector"` diff --git a/detection/message/message.go b/detection/message/message.go index d00f1c5..bc6897a 100644 --- a/detection/message/message.go +++ b/detection/message/message.go @@ -2,29 +2,30 @@ package message import ( "fmt" + "regexp" "strings" "github.com/chaoss/ai-detection-action/detection" ) var commitMessagePatterns = []struct { - check func(string) bool + check func(string) (detection.Confidence, bool) name string }{ { - check: func(msg string) bool { - return strings.HasPrefix(strings.ToLower(msg), "aider:") + check: func(msg string) (detection.Confidence, bool) { + return detection.ConfidenceMedium, strings.HasPrefix(strings.ToLower(msg), "aider:") }, name: "Aider", }, { - check: func(msg string) bool { - return strings.Contains(msg, "Generated with Claude Code") + check: func(msg string) (detection.Confidence, bool) { + return detection.ConfidenceMedium, strings.Contains(msg, "Generated with Claude Code") }, name: "Claude Code", }, { - check: func(msg string) bool { + check: func(msg string) (detection.Confidence, bool) { trailers := []string{ "Entire-Metadata", "Entire-Metadata-Task", @@ -37,13 +38,43 @@ var commitMessagePatterns = []struct { } for _, trailer := range trailers { if strings.Contains(msg, fmt.Sprintf("\n%s:", trailer)) { - return true + return detection.ConfidenceMedium, true } } - return false + return detection.ConfidenceMedium, false }, name: "EntireIO", }, + { + check: func(msg string) (detection.Confidence, bool) { + trailerRegex := regexp.MustCompile(`(?m)^Replit-Commit-Author:\s*(Agent|Assistant)(?:\r?\nReplit-Commit-Session-Id:\s*([a-fA-F0-9-]+))?(?:\r?\n|$)`) + + matchResult := trailerRegex.FindStringSubmatch(msg) + if len(matchResult) == 0 { + // replit not detected + return detection.ConfidenceMedium, false + } + + var confidence detection.Confidence + switch matchResult[1] { + case "Agent": + confidence = detection.ConfidenceMedium + case "Assistant": + confidence = detection.ConfidenceLow + default: + // unknown replit product, we cannot confirm ai use + return detection.ConfidenceLow, false + } + + // if commit session id also present, increase confidence + if matchResult[2] != "" { + confidence.Increment() + } + + return confidence, true + }, + name: "Replit", + }, } type Detector struct{} @@ -57,11 +88,11 @@ func (d *Detector) Detect(input detection.Input) []detection.Finding { var findings []detection.Finding for _, p := range commitMessagePatterns { - if p.check(input.CommitMessage) { + if confidence, isDetected := p.check(input.CommitMessage); isDetected { findings = append(findings, detection.Finding{ Detector: d.Name(), Tool: p.name, - Confidence: detection.ConfidenceMedium, + Confidence: confidence, Detail: fmt.Sprintf("commit message matches %s pattern", p.name), }) } diff --git a/detection/message/message_test.go b/detection/message/message_test.go index 2352a3b..3c79655 100644 --- a/detection/message/message_test.go +++ b/detection/message/message_test.go @@ -9,69 +9,142 @@ import ( func TestDetect(t *testing.T) { d := &Detector{} tests := []struct { - name string - message string - wantTools []string + name string + message string + wantTools []string + wantConfidence []detection.Confidence }{ { - name: "aider prefix", - message: "aider: fix the login bug", - wantTools: []string{"Aider"}, + name: "aider prefix", + message: "aider: fix the login bug", + wantTools: []string{"Aider"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "aider prefix uppercase", - message: "Aider: refactor auth module", - wantTools: []string{"Aider"}, + name: "aider prefix uppercase", + message: "Aider: refactor auth module", + wantTools: []string{"Aider"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Claude Code footer", - message: "Add user validation\n\nGenerated with Claude Code", - wantTools: []string{"Claude Code"}, + name: "Claude Code footer", + message: "Add user validation\n\nGenerated with Claude Code", + wantTools: []string{"Claude Code"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Claude Code footer with link", - message: "Add validation\n\nGenerated with Claude Code\nhttps://claude.ai", - wantTools: []string{"Claude Code"}, + name: "Claude Code footer with link", + message: "Add validation\n\nGenerated with Claude Code\nhttps://claude.ai", + wantTools: []string{"Claude Code"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "EntireIO trailer present in commit", - message: "this is some commit message\n\nEntire-Checkpoint: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "EntireIO trailer present in commit", + message: "this is some commit message\n\nEntire-Checkpoint: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Another EntireIO trailer present in commit", - message: "this is some commit message\n\nEntire-Metadata: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "Another EntireIO trailer present in commit", + message: "this is some commit message\n\nEntire-Metadata: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Another EntireIO trailer present in commit with CRLF line endings", - message: "this is some commit message\r\n\r\nEntire-Metadata: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "Another EntireIO trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nEntire-Metadata: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "EntireIO trailer not used, only mentioned in a commit", - message: "this is a commit message with\nEntire-Metadata mentioned", - wantTools: nil, + name: "EntireIO trailer not used, only mentioned in a commit", + message: "this is a commit message with\nEntire-Metadata mentioned", + wantTools: nil, + wantConfidence: nil, }, { - name: "no patterns", - message: "normal commit message with no AI signatures", - wantTools: nil, + name: "Replit Agent trailer present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Agent", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "aider in middle of message not prefix", - message: "fix the aider: integration test", - wantTools: nil, + name: "Replit Agent trailer present in a commit with session id", + message: "this is a commit message with\nReplit-Commit-Author: Agent\nReplit-Commit-Session-Id: 1234a1ab-12ab-1234-abcd-0123456a1234", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceHigh}, }, { - name: "aider as substring of a word", - message: "raider: fix the tests", - wantTools: nil, + name: "Replit Assistant trailer present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Assistant", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, }, { - name: "empty message", - message: "", - wantTools: nil, + name: "Replit Assistant trailer present in a commit with session id", + message: "this is a commit message with\nReplit-Commit-Author: Assistant\nReplit-Commit-Session-Id: 1234a1ab-12ab-1234-abcd-0123456a1234", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Agent trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Agent", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Assistant trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Assistant", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, + }, + { + name: "Replit Agent trailer present in commit with another trailer with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Agent\r\nSomeOther: Trailer", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Assistant trailer present in commit with another trailer with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Assistant\r\nSomeOther: Trailer", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, + }, + { + name: "Some other Replit product trailer (not agent or asst) present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: SomeOtherReplitProduct", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "Replit trailer not used, only mentioned in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Assistant mentioned", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "no patterns", + message: "normal commit message with no AI signatures", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "aider in middle of message not prefix", + message: "fix the aider: integration test", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "aider as substring of a word", + message: "raider: fix the tests", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "empty message", + message: "", + wantTools: nil, + wantConfidence: nil, }, } @@ -79,11 +152,11 @@ func TestDetect(t *testing.T) { t.Run(tt.name, func(t *testing.T) { findings := d.Detect(detection.Input{CommitMessage: tt.message}) gotTools := make([]string, len(findings)) + gotConfidence := make([]detection.Confidence, len(findings)) for i, f := range findings { gotTools[i] = f.Tool - if f.Confidence != detection.ConfidenceMedium { - t.Errorf("confidence = %d, want %d", f.Confidence, detection.ConfidenceMedium) - } + gotConfidence[i] = f.Confidence + if f.Detector != "message" { t.Errorf("detector = %q, want %q", f.Detector, "message") } @@ -92,7 +165,6 @@ func TestDetect(t *testing.T) { if len(gotTools) == 0 { gotTools = nil } - if len(gotTools) != len(tt.wantTools) { t.Errorf("tools = %v, want %v", gotTools, tt.wantTools) return @@ -103,6 +175,20 @@ func TestDetect(t *testing.T) { return } } + + if len(gotConfidence) == 0 { + gotConfidence = nil + } + if len(gotConfidence) != len(tt.wantConfidence) { + t.Errorf("confidence = %v, want %v", gotConfidence, tt.wantConfidence) + return + } + for i := range gotConfidence { + if gotConfidence[i] != tt.wantConfidence[i] { + t.Errorf("confidence = %v, want %v", gotConfidence, tt.wantConfidence) + return + } + } }) } }