From b90a1f0530a464d0d4a7cc85fc5bdec58052a5ef Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Tue, 24 Mar 2026 14:49:28 +0530 Subject: [PATCH 1/3] Detect Replit trailers in commit messages, use confidence levels Closes #23. Currently Replit has 2 main AI products producing git commits - Agent for from scratch, and Assistant for extending existing projects. More info see: https://blog.replit.com/new-ai-assistant-announcement Replit adds Replit-Commit-Author trailer for commits generated by both Assistant or Agent, where we have following cases: - Agent commits always have the trailer as all code in commit is gen and owned by agent - Assistant commits have the trailer when: (a) asst was used to summarize commit message only or (b) asst was used to write all code in commit or (c) asst wrote or changed only some code in commit - Assistant commits don't have the trailer when entire code and commit message is manual and committed using Replit ui. So we can check and use Replit-Commit-Author trailer as follows: - if its Agent then medium confidence - if its Assistant then low confidence Additionally we increase (+1) confidence if Replit-Commit-Session-Id, which denotes that commit was generated as part of an AI conversation or workflow. Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- detection/detection.go | 4 + detection/message/message.go | 44 ++++++-- detection/message/message_test.go | 166 ++++++++++++++++++++++-------- 3 files changed, 161 insertions(+), 53 deletions(-) diff --git a/detection/detection.go b/detection/detection.go index 04cacfd..f40c92f 100644 --- a/detection/detection.go +++ b/detection/detection.go @@ -22,6 +22,10 @@ func (c Confidence) String() string { } } +func (c *Confidence) Increment() { + *c = min(*c+1, ConfidenceHigh) +} + // Finding represents a single detection of AI involvement. type Finding struct { Detector string `json:"detector"` diff --git a/detection/message/message.go b/detection/message/message.go index d00f1c5..e12d173 100644 --- a/detection/message/message.go +++ b/detection/message/message.go @@ -2,29 +2,30 @@ package message import ( "fmt" + "regexp" "strings" "github.com/chaoss/ai-detection-action/detection" ) var commitMessagePatterns = []struct { - check func(string) bool + check func(string) (detection.Confidence, bool) name string }{ { - check: func(msg string) bool { - return strings.HasPrefix(strings.ToLower(msg), "aider:") + check: func(msg string) (detection.Confidence, bool) { + return detection.ConfidenceMedium, strings.HasPrefix(strings.ToLower(msg), "aider:") }, name: "Aider", }, { - check: func(msg string) bool { - return strings.Contains(msg, "Generated with Claude Code") + check: func(msg string) (detection.Confidence, bool) { + return detection.ConfidenceMedium, strings.Contains(msg, "Generated with Claude Code") }, name: "Claude Code", }, { - check: func(msg string) bool { + check: func(msg string) (detection.Confidence, bool) { trailers := []string{ "Entire-Metadata", "Entire-Metadata-Task", @@ -37,13 +38,36 @@ var commitMessagePatterns = []struct { } for _, trailer := range trailers { if strings.Contains(msg, fmt.Sprintf("\n%s:", trailer)) { - return true + return detection.ConfidenceMedium, true } } - return false + return detection.ConfidenceMedium, false }, name: "EntireIO", }, + { + check: func(msg string) (detection.Confidence, bool) { + confidence := detection.ConfidenceMedium + trailerRegex := regexp.MustCompile(`(?m)^Replit-Commit-Author:\s*(Agent|Assistant)(?:\r?\nReplit-Commit-Session-Id:\s*([a-fA-F0-9-]+))?(?:\r?\n|$)`) + + matchResult := trailerRegex.FindStringSubmatch(msg) + if len(matchResult) > 0 { + switch matchResult[1] { + case "Agent": + confidence = detection.ConfidenceMedium + case "Assistant": + confidence = detection.ConfidenceLow + } + // if commit session id also present, increase confidence + if matchResult[2] != "" { + confidence.Increment() + } + return confidence, true + } + return confidence, false + }, + name: "Replit", + }, } type Detector struct{} @@ -57,11 +81,11 @@ func (d *Detector) Detect(input detection.Input) []detection.Finding { var findings []detection.Finding for _, p := range commitMessagePatterns { - if p.check(input.CommitMessage) { + if confidence, isDetected := p.check(input.CommitMessage); isDetected { findings = append(findings, detection.Finding{ Detector: d.Name(), Tool: p.name, - Confidence: detection.ConfidenceMedium, + Confidence: confidence, Detail: fmt.Sprintf("commit message matches %s pattern", p.name), }) } diff --git a/detection/message/message_test.go b/detection/message/message_test.go index 2352a3b..bb934c3 100644 --- a/detection/message/message_test.go +++ b/detection/message/message_test.go @@ -9,69 +9,136 @@ import ( func TestDetect(t *testing.T) { d := &Detector{} tests := []struct { - name string - message string - wantTools []string + name string + message string + wantTools []string + wantConfidence []detection.Confidence }{ { - name: "aider prefix", - message: "aider: fix the login bug", - wantTools: []string{"Aider"}, + name: "aider prefix", + message: "aider: fix the login bug", + wantTools: []string{"Aider"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "aider prefix uppercase", - message: "Aider: refactor auth module", - wantTools: []string{"Aider"}, + name: "aider prefix uppercase", + message: "Aider: refactor auth module", + wantTools: []string{"Aider"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Claude Code footer", - message: "Add user validation\n\nGenerated with Claude Code", - wantTools: []string{"Claude Code"}, + name: "Claude Code footer", + message: "Add user validation\n\nGenerated with Claude Code", + wantTools: []string{"Claude Code"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Claude Code footer with link", - message: "Add validation\n\nGenerated with Claude Code\nhttps://claude.ai", - wantTools: []string{"Claude Code"}, + name: "Claude Code footer with link", + message: "Add validation\n\nGenerated with Claude Code\nhttps://claude.ai", + wantTools: []string{"Claude Code"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "EntireIO trailer present in commit", - message: "this is some commit message\n\nEntire-Checkpoint: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "EntireIO trailer present in commit", + message: "this is some commit message\n\nEntire-Checkpoint: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Another EntireIO trailer present in commit", - message: "this is some commit message\n\nEntire-Metadata: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "Another EntireIO trailer present in commit", + message: "this is some commit message\n\nEntire-Metadata: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "Another EntireIO trailer present in commit with CRLF line endings", - message: "this is some commit message\r\n\r\nEntire-Metadata: ab123cdefg12", - wantTools: []string{"EntireIO"}, + name: "Another EntireIO trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nEntire-Metadata: ab123cdefg12", + wantTools: []string{"EntireIO"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "EntireIO trailer not used, only mentioned in a commit", - message: "this is a commit message with\nEntire-Metadata mentioned", - wantTools: nil, + name: "EntireIO trailer not used, only mentioned in a commit", + message: "this is a commit message with\nEntire-Metadata mentioned", + wantTools: nil, + wantConfidence: nil, }, { - name: "no patterns", - message: "normal commit message with no AI signatures", - wantTools: nil, + name: "Replit Agent trailer present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Agent", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, }, { - name: "aider in middle of message not prefix", - message: "fix the aider: integration test", - wantTools: nil, + name: "Replit Agent trailer present in a commit with session id", + message: "this is a commit message with\nReplit-Commit-Author: Agent\nReplit-Commit-Session-Id: 1234a1ab-12ab-1234-abcd-0123456a1234", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceHigh}, }, { - name: "aider as substring of a word", - message: "raider: fix the tests", - wantTools: nil, + name: "Replit Assistant trailer present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Assistant", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, }, { - name: "empty message", - message: "", - wantTools: nil, + name: "Replit Assistant trailer present in a commit with session id", + message: "this is a commit message with\nReplit-Commit-Author: Assistant\nReplit-Commit-Session-Id: 1234a1ab-12ab-1234-abcd-0123456a1234", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Agent trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Agent", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Assistant trailer present in commit with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Assistant", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, + }, + { + name: "Replit Agent trailer present in commit with another trailer with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Agent\r\nSomeOther: Trailer", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceMedium}, + }, + { + name: "Replit Assistant trailer present in commit with another trailer with CRLF line endings", + message: "this is some commit message\r\n\r\nReplit-Commit-Author: Assistant\r\nSomeOther: Trailer", + wantTools: []string{"Replit"}, + wantConfidence: []detection.Confidence{detection.ConfidenceLow}, + }, + { + name: "Replit trailer not used, only mentioned in a commit", + message: "this is a commit message with\nReplit-Commit-Author: Assistant mentioned", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "no patterns", + message: "normal commit message with no AI signatures", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "aider in middle of message not prefix", + message: "fix the aider: integration test", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "aider as substring of a word", + message: "raider: fix the tests", + wantTools: nil, + wantConfidence: nil, + }, + { + name: "empty message", + message: "", + wantTools: nil, + wantConfidence: nil, }, } @@ -79,11 +146,11 @@ func TestDetect(t *testing.T) { t.Run(tt.name, func(t *testing.T) { findings := d.Detect(detection.Input{CommitMessage: tt.message}) gotTools := make([]string, len(findings)) + gotConfidence := make([]detection.Confidence, len(findings)) for i, f := range findings { gotTools[i] = f.Tool - if f.Confidence != detection.ConfidenceMedium { - t.Errorf("confidence = %d, want %d", f.Confidence, detection.ConfidenceMedium) - } + gotConfidence[i] = f.Confidence + if f.Detector != "message" { t.Errorf("detector = %q, want %q", f.Detector, "message") } @@ -92,7 +159,6 @@ func TestDetect(t *testing.T) { if len(gotTools) == 0 { gotTools = nil } - if len(gotTools) != len(tt.wantTools) { t.Errorf("tools = %v, want %v", gotTools, tt.wantTools) return @@ -103,6 +169,20 @@ func TestDetect(t *testing.T) { return } } + + if len(gotConfidence) == 0 { + gotConfidence = nil + } + if len(gotConfidence) != len(tt.wantConfidence) { + t.Errorf("confidence = %v, want %v", gotConfidence, tt.wantConfidence) + return + } + for i := range gotConfidence { + if gotConfidence[i] != tt.wantConfidence[i] { + t.Errorf("confidence = %v, want %v", gotConfidence, tt.wantConfidence) + return + } + } }) } } From 6739faf1e9634b944c424cb3979c8c78b2d9bc8d Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Tue, 24 Mar 2026 15:26:27 +0530 Subject: [PATCH 2/3] Return early, init confidence explicitly Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- detection/message/message.go | 35 ++++++++++++++++++------------- detection/message/message_test.go | 6 ++++++ 2 files changed, 27 insertions(+), 14 deletions(-) diff --git a/detection/message/message.go b/detection/message/message.go index e12d173..bc6897a 100644 --- a/detection/message/message.go +++ b/detection/message/message.go @@ -47,24 +47,31 @@ var commitMessagePatterns = []struct { }, { check: func(msg string) (detection.Confidence, bool) { - confidence := detection.ConfidenceMedium trailerRegex := regexp.MustCompile(`(?m)^Replit-Commit-Author:\s*(Agent|Assistant)(?:\r?\nReplit-Commit-Session-Id:\s*([a-fA-F0-9-]+))?(?:\r?\n|$)`) matchResult := trailerRegex.FindStringSubmatch(msg) - if len(matchResult) > 0 { - switch matchResult[1] { - case "Agent": - confidence = detection.ConfidenceMedium - case "Assistant": - confidence = detection.ConfidenceLow - } - // if commit session id also present, increase confidence - if matchResult[2] != "" { - confidence.Increment() - } - return confidence, true + if len(matchResult) == 0 { + // replit not detected + return detection.ConfidenceMedium, false + } + + var confidence detection.Confidence + switch matchResult[1] { + case "Agent": + confidence = detection.ConfidenceMedium + case "Assistant": + confidence = detection.ConfidenceLow + default: + // unknown replit product, we cannot confirm ai use + return detection.ConfidenceLow, false } - return confidence, false + + // if commit session id also present, increase confidence + if matchResult[2] != "" { + confidence.Increment() + } + + return confidence, true }, name: "Replit", }, diff --git a/detection/message/message_test.go b/detection/message/message_test.go index bb934c3..3c79655 100644 --- a/detection/message/message_test.go +++ b/detection/message/message_test.go @@ -110,6 +110,12 @@ func TestDetect(t *testing.T) { wantTools: []string{"Replit"}, wantConfidence: []detection.Confidence{detection.ConfidenceLow}, }, + { + name: "Some other Replit product trailer (not agent or asst) present in a commit", + message: "this is a commit message with\nReplit-Commit-Author: SomeOtherReplitProduct", + wantTools: nil, + wantConfidence: nil, + }, { name: "Replit trailer not used, only mentioned in a commit", message: "this is a commit message with\nReplit-Commit-Author: Assistant mentioned", From f16eac8ace4accd0e5c61a6a8f75fb909cd9b92a Mon Sep 17 00:00:00 2001 From: Omkar P <45419097+omkar-foss@users.noreply.github.com> Date: Fri, 10 Apr 2026 12:21:00 +0530 Subject: [PATCH 3/3] Update README to mention ai session id trailers Signed-off-by: Omkar P <45419097+omkar-foss@users.noreply.github.com> --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1417902..a0e6196 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,12 @@ Four detectors run against each commit, each producing findings at a confidence **High confidence** -- strong signals that an AI tool authored or co-authored the commit: - Known AI bot committer emails (Claude, Copilot, Cursor, Codex, Gemini Code Assist, Amazon Q, Devin, Cline, Continue.dev, Cody, JetBrains AI, CodeRabbit). Also matches on the numeric prefix of GitHub noreply emails, so bot username renames don't break detection. - `Co-Authored-By` trailers with known AI tool emails (Claude Code, Cursor, Aider). +- AI session ID trailers (such as Replit-Commit-Session-Id) combined with other known commit trailers, indicating that the commit was generated as part of an AI conversation or workflow. **Medium confidence** -- patterns in the commit message itself: - `aider:` prefix (Aider's default commit format). - `Generated with Claude Code` footer. -- Known commit trailers in formats unique to specific tools (such as EntireIO and Replit) that can contain values indicative of AI use +- Known commit trailers in formats unique to specific tools (such as EntireIO, Replit Agent/Assistant) that can contain values indicative of AI use. **Low confidence** -- mentions of AI tool names in text: