Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
private-key.pem
*.pem
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Use Python 3.11 slim image
FROM python:3.11-slim

# Disable Python output buffering so logs appear immediately
ENV PYTHONUNBUFFERED=1

# Set working directory
WORKDIR /app

Expand Down
161 changes: 161 additions & 0 deletions agent.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
import { generateText, tool, stepCountIs } from 'ai';
import { anthropic } from '@ai-sdk/anthropic';
import { openai } from '@ai-sdk/openai';
import { google } from '@ai-sdk/google';
import { z } from 'zod';
import { spawnSync } from 'child_process';
import { readFileSync } from 'fs';

function runBash(command, timeout = 30000) {
const safeEnv = {
PATH: process.env.PATH || '/usr/bin:/bin:/usr/local/bin',
HOME: process.env.HOME || '/root',
LANG: process.env.LANG || 'en_US.UTF-8',
};

const result = spawnSync('sh', ['-c', command], {
cwd: '/tmp/repo',
env: safeEnv,
timeout,
encoding: 'utf8',
});

if (result.error?.code === 'ETIMEDOUT') return `ERROR: Command timed out after ${timeout / 1000}s`;
if (result.error) return `ERROR: ${result.error.message}`;

let output = result.stdout || '';
if (result.stderr) output += `\n[stderr]: ${result.stderr}`;
if (!output.trim()) return `[exit code ${result.status}, no output]`;
if (output.length > 8000) output = output.slice(0, 8000) + `\n...[truncated, ${output.length} total chars]`;
return output;
}

const SYSTEM_PROMPT = `You are a senior software engineer doing a thorough review of a pull request.

The repository is already cloned at the repo root. You have full bash access — use it liberally, there is no cost to running many commands.

You MUST do all of the following before forming any conclusions:
1. Read each changed file in full, not just the diff
2. Find every caller and usage of any modified function, class, or symbol across the entire repo
3. Read related files — tests, configs, dependent modules, anything that could be affected
4. Check for edge cases: error handling, concurrency, security, null/undefined, type mismatches
5. Run any additional commands needed to fully understand the impact

Use as many bash calls as you need. Do not cut corners.

Only after thorough exploration, return at most 3 findings as JSON — no other text. Focus on real bugs, security issues, or broken logic. Skip style nits.

{
"findings": [
{
"file": "path/to/file.py",
"line": 123,
"severity": "critical" | "high" | "medium" | "low",
"message": "..."
},
{
"severity": "medium",
"message": "Overall: ..."
}
]
}

If there are no significant issues, return {"findings": []}.`;

function getModel(modelId) {
if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3') || modelId.startsWith('o4')) {
return openai(modelId);
}
if (modelId.startsWith('gemini-')) {
return google(modelId);
}
return anthropic(modelId);
}

async function main() {
const repoName = process.env.REPO;
const branchName = process.env.BRANCH || 'main';
const githubToken = process.env.GITHUB_TOKEN;
const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10');
const modelId = process.env.MODEL || 'gemini-2.5-flash';

if (!repoName || !githubToken) {
console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] }));
process.exit(0);
}

// Clone the repository
process.stderr.write('Cloning repository...\n');
const cloneDir = '/tmp/repo';
const cloneUrl = `https://${githubToken}@github.com/${repoName}.git`;

spawnSync('rm', ['-rf', cloneDir]);
const cloneResult = spawnSync(
'git',
['clone', '--depth=1', '--branch', branchName, cloneUrl, cloneDir],
{ encoding: 'utf8', env: { ...process.env, GIT_TERMINAL_PROMPT: '0' } }
);

if (cloneResult.status !== 0) {
console.log(JSON.stringify({ error: `Failed to clone: ${cloneResult.stderr || 'unknown error'}`, findings: [] }));
process.exit(0);
}
process.stderr.write(`Cloned ${repoName} branch ${branchName} (shallow)\n`);

// Strip token from git config to prevent exfiltration via bash tool
spawnSync('git', ['remote', 'set-url', 'origin', `https://github.com/${repoName}.git`], {
cwd: cloneDir, encoding: 'utf8',
});

// Read PR diff
let prDiff = '';
try {
prDiff = readFileSync('/app/pr.diff', 'utf8');
} catch (e) {
process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`);
}

process.stderr.write(`Running agent with ${modelId}...\n`);

try {
let stepCount = 0;

const { text } = await generateText({
model: getModel(modelId),
system: SYSTEM_PROMPT,
prompt: `Please review this pull request:\n\n${prDiff}`,
stopWhen: stepCountIs(maxSteps),
tools: {
bash: tool({
description: 'Run a shell command in the repository root.',
inputSchema: z.object({ command: z.string() }),
execute: async ({ command }) => {
stepCount++;
process.stderr.write(`Tool call ${stepCount}: bash(${JSON.stringify({ command })})\n`);
return runBash(command);
},
}),
},
});

// Strip markdown fences if present
let cleaned = text.trim();
if (cleaned.startsWith('```')) {
const lines = cleaned.split('\n').slice(1);
if (lines.at(-1)?.trim() === '```') lines.pop();
cleaned = lines.join('\n').trim();
}

const output = JSON.parse(cleaned);
if (!Array.isArray(output.findings)) output.findings = [];
output.findings = output.findings.slice(0, 3);

console.log(JSON.stringify(output));
process.exit(0);
} catch (e) {
console.log(JSON.stringify({ error: `Agent failed: ${e.message}`, findings: [] }));
process.exit(0);
}
}

main();
210 changes: 210 additions & 0 deletions agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
import os
import subprocess
import json
import sys
from anthropic import Anthropic


def run_bash(command: str, timeout: int = 30) -> str:
"""Run a shell command in the cloned repo directory with secrets stripped from env."""
try:
# Minimal env — no secrets accessible to shell commands (prevents prompt injection exfiltration)
safe_env = {
"PATH": os.environ.get("PATH", "/usr/bin:/bin:/usr/local/bin"),
"HOME": os.environ.get("HOME", "/root"),
"LANG": os.environ.get("LANG", "en_US.UTF-8"),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[HIGH] Security concern: Passing GITHUB_TOKEN via environment variable to subprocess with shell=True (line 18) in run_bash() is a vulnerability vector. Although safe_env is used to strip most env vars, the function signature doesn't prevent callers from modifying env. Additionally, running arbitrary git/bash commands with token access could enable exfiltration despite the stated intent to prevent prompt injection.

}
result = subprocess.run(
command, shell=True, capture_output=True,
text=True, timeout=timeout, cwd='/tmp/repo',
env=safe_env
)
output = result.stdout
if result.stderr:
output += f"\n[stderr]: {result.stderr}"
if not output.strip():
return f"[exit code {result.returncode}, no output]"
if len(output) > 8000:
output = output[:8000] + f"\n...[truncated, {len(output)} total chars]"
return output
except subprocess.TimeoutExpired:
return f"ERROR: Command timed out after {timeout}s"
except Exception as e:
return f"ERROR: {str(e)}"


TOOLS = [
{
"name": "bash",
"description": "Run a shell command in the repository root.",
"input_schema": {
"type": "object",
"properties": {
"command": {"type": "string", "description": "The shell command to run"}
},
"required": ["command"]
}
}
]

SYSTEM_PROMPT = """You are a senior software engineer reviewing a pull request.

The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.

Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). Use whichever makes more sense for each issue.

{
"findings": [
{
"file": "path/to/file.py",
"line": 123,
"severity": "critical" | "high" | "medium" | "low",
"message": "..."
},
{
"severity": "medium",
"message": "Overall: ..."
}
]
}

If there are no significant issues, return {"findings": []}.
"""


def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
"""Run the agentic loop using Anthropic's tool use API."""
messages = [
{
"role": "user",
"content": f"Please review this pull request:\n\n{pr_diff}"
}
]
tool_call_count = 0

while True:
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=4096,
system=SYSTEM_PROMPT,
tools=TOOLS,
messages=messages
)

messages.append({"role": "assistant", "content": response.content})

if response.stop_reason == "end_turn":
print(f"end_turn content: {response.content}", file=sys.stderr)
for block in response.content:
if hasattr(block, 'text') and block.text.strip():
return block.text
return ""

if response.stop_reason == "tool_use":
tool_results = []
for block in response.content:
if block.type == "tool_use":
tool_call_count += 1
print(f"Tool call {tool_call_count}/{max_tool_calls}: {block.name}({json.dumps(block.input)})", file=sys.stderr)
result = run_bash(block.input["command"]) if block.name == "bash" else f"Unknown tool: {block.name}"
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})

messages.append({"role": "user", "content": tool_results})

if tool_call_count >= max_tool_calls:
print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr)
messages.append({"role": "user", "content": "You've used the maximum number of tool calls. Based on everything you've seen, provide your final review as JSON now."})
response = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=4096,
system=SYSTEM_PROMPT,
messages=messages
)
print(f"Final response stop_reason={response.stop_reason} content={response.content}", file=sys.stderr)
for block in response.content:
if hasattr(block, 'text') and block.text.strip():
return block.text
return ""
else:
break

return ""


def main():
repo_name = os.environ.get('REPO')
commit_sha = os.environ.get('COMMIT_SHA')
branch_name = os.environ.get('BRANCH', 'main')
anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY')
github_token = os.environ.get('GITHUB_TOKEN')
max_tool_calls = int(os.environ.get('MAX_TOOL_CALLS', '10'))

if not all([repo_name, commit_sha, anthropic_api_key, github_token]):
print(json.dumps({"error": "Missing required environment variables", "findings": []}))
sys.exit(1)

# Clone the repository at the PR commit
print("Cloning repository...", file=sys.stderr)
clone_dir = '/tmp/repo'
clone_url = f"https://{github_token}@github.com/{repo_name}.git"

try:
subprocess.run(['rm', '-rf', clone_dir], check=True)
subprocess.run(
['git', 'clone', '--depth=1', '--branch', branch_name, clone_url, clone_dir],
check=True, capture_output=True,
env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'}
)
print(f"Cloned {repo_name} branch {branch_name} (shallow)", file=sys.stderr)
except subprocess.CalledProcessError as e:
print(json.dumps({"error": f"Failed to clone: {e.stderr.decode() if e.stderr else str(e)}", "findings": []}))
sys.exit(1)

os.chdir(clone_dir)

# Read PR diff from file written by server.py
try:
with open('/app/pr.diff', 'r', encoding='utf-8') as f:
pr_diff = f.read()
except Exception as e:
print(f"Warning: Could not read PR diff ({e})", file=sys.stderr)
pr_diff = ""

# Run the agentic review
print("Running agent...", file=sys.stderr)
client = Anthropic(api_key=anthropic_api_key)

try:
final_response = run_agent(client, pr_diff, max_tool_calls)

if not final_response:
raise ValueError("No response from agent")

# Strip markdown fences if present
cleaned = final_response.strip()
if cleaned.startswith('```'):
lines = cleaned.split('\n')[1:]
if lines and lines[-1].strip() == '```':
lines = lines[:-1]
cleaned = '\n'.join(lines).strip()

output = json.loads(cleaned)
if not isinstance(output.get('findings'), list):
output['findings'] = []
output['findings'] = output['findings'][:3]

print(json.dumps(output))
sys.exit(0)

except json.JSONDecodeError as e:
print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []}))
except Exception as e:
print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []}))


if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
e2b>=1.0.0
anthropic>=0.25.0
openai
requests
python-dotenv
Expand Down
Loading