SCE-Development · vineeshah · Apr 8, 2026 · Apr 8, 2026 · Apr 9, 2026 · Apr 9, 2026
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,2 @@
+private-key.pem
+*.pem
diff --git a/Dockerfile b/Dockerfile
@@ -1,6 +1,9 @@
 # Use Python 3.11 slim image
 FROM python:3.11-slim
 
+# Disable Python output buffering so logs appear immediately
+ENV PYTHONUNBUFFERED=1
+
 # Set working directory
 WORKDIR /app
 

diff --git a/agent.mjs b/agent.mjs
@@ -0,0 +1,161 @@
+import { generateText, tool, stepCountIs } from 'ai';
+import { anthropic } from '@ai-sdk/anthropic';
+import { openai } from '@ai-sdk/openai';
+import { google } from '@ai-sdk/google';
+import { z } from 'zod';
+import { spawnSync } from 'child_process';
+import { readFileSync } from 'fs';
+
+function runBash(command, timeout = 30000) {
+  const safeEnv = {
+    PATH: process.env.PATH || '/usr/bin:/bin:/usr/local/bin',
+    HOME: process.env.HOME || '/root',
+    LANG: process.env.LANG || 'en_US.UTF-8',
+  };
+
+  const result = spawnSync('sh', ['-c', command], {
+    cwd: '/tmp/repo',
+    env: safeEnv,
+    timeout,
+    encoding: 'utf8',
+  });
+
+  if (result.error?.code === 'ETIMEDOUT') return `ERROR: Command timed out after ${timeout / 1000}s`;
+  if (result.error) return `ERROR: ${result.error.message}`;
+
+  let output = result.stdout || '';
+  if (result.stderr) output += `\n[stderr]: ${result.stderr}`;
+  if (!output.trim()) return `[exit code ${result.status}, no output]`;
+  if (output.length > 8000) output = output.slice(0, 8000) + `\n...[truncated, ${output.length} total chars]`;
+  return output;
+}
+
+const SYSTEM_PROMPT = `You are a senior software engineer doing a thorough review of a pull request.
+
+The repository is already cloned at the repo root. You have full bash access — use it liberally, there is no cost to running many commands.
+
+You MUST do all of the following before forming any conclusions:
+1. Read each changed file in full, not just the diff
+2. Find every caller and usage of any modified function, class, or symbol across the entire repo
+3. Read related files — tests, configs, dependent modules, anything that could be affected
+4. Check for edge cases: error handling, concurrency, security, null/undefined, type mismatches
+5. Run any additional commands needed to fully understand the impact
+
+Use as many bash calls as you need. Do not cut corners.
+
+Only after thorough exploration, return at most 3 findings as JSON — no other text. Focus on real bugs, security issues, or broken logic. Skip style nits.
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.`;
+
+function getModel(modelId) {
+  if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3') || modelId.startsWith('o4')) {
+    return openai(modelId);
+  }
+  if (modelId.startsWith('gemini-')) {
+    return google(modelId);
+  }
+  return anthropic(modelId);
+}
+
+async function main() {
+  const repoName = process.env.REPO;
+  const branchName = process.env.BRANCH || 'main';
+  const githubToken = process.env.GITHUB_TOKEN;
+  const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10');
+  const modelId = process.env.MODEL || 'gemini-2.5-flash';
+
+  if (!repoName || !githubToken) {
+    console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] }));
+    process.exit(0);
+  }
+
+  // Clone the repository
+  process.stderr.write('Cloning repository...\n');
+  const cloneDir = '/tmp/repo';
+  const cloneUrl = `https://${githubToken}@github.com/${repoName}.git`;
+
+  spawnSync('rm', ['-rf', cloneDir]);
+  const cloneResult = spawnSync(
+    'git',
+    ['clone', '--depth=1', '--branch', branchName, cloneUrl, cloneDir],
+    { encoding: 'utf8', env: { ...process.env, GIT_TERMINAL_PROMPT: '0' } }
+  );
+
+  if (cloneResult.status !== 0) {
+    console.log(JSON.stringify({ error: `Failed to clone: ${cloneResult.stderr || 'unknown error'}`, findings: [] }));
+    process.exit(0);
+  }
+  process.stderr.write(`Cloned ${repoName} branch ${branchName} (shallow)\n`);
+
+  // Strip token from git config to prevent exfiltration via bash tool
+  spawnSync('git', ['remote', 'set-url', 'origin', `https://github.com/${repoName}.git`], {
+    cwd: cloneDir, encoding: 'utf8',
+  });
+
+  // Read PR diff
+  let prDiff = '';
+  try {
+    prDiff = readFileSync('/app/pr.diff', 'utf8');
+  } catch (e) {
+    process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`);
+  }
+
+  process.stderr.write(`Running agent with ${modelId}...\n`);
+
+  try {
+    let stepCount = 0;
+
+    const { text } = await generateText({
+      model: getModel(modelId),
+      system: SYSTEM_PROMPT,
+      prompt: `Please review this pull request:\n\n${prDiff}`,
+      stopWhen: stepCountIs(maxSteps),
+      tools: {
+        bash: tool({
+          description: 'Run a shell command in the repository root.',
+          inputSchema: z.object({ command: z.string() }),
+          execute: async ({ command }) => {
+            stepCount++;
+            process.stderr.write(`Tool call ${stepCount}: bash(${JSON.stringify({ command })})\n`);
+            return runBash(command);
+          },
+        }),
+      },
+    });
+
+    // Strip markdown fences if present
+    let cleaned = text.trim();
+    if (cleaned.startsWith('```')) {
+      const lines = cleaned.split('\n').slice(1);
+      if (lines.at(-1)?.trim() === '```') lines.pop();
+      cleaned = lines.join('\n').trim();
+    }
+
+    const output = JSON.parse(cleaned);
+    if (!Array.isArray(output.findings)) output.findings = [];
+    output.findings = output.findings.slice(0, 3);
+
+    console.log(JSON.stringify(output));
+    process.exit(0);
+  } catch (e) {
+    console.log(JSON.stringify({ error: `Agent failed: ${e.message}`, findings: [] }));
+    process.exit(0);
+  }
+}
+
+main();
diff --git a/agent.py b/agent.py
@@ -0,0 +1,210 @@
+import os
+import subprocess
+import json
+import sys
+from anthropic import Anthropic
+
+
+def run_bash(command: str, timeout: int = 30) -> str:
+    """Run a shell command in the cloned repo directory with secrets stripped from env."""
+    try:
+        # Minimal env — no secrets accessible to shell commands (prevents prompt injection exfiltration)
+        safe_env = {
+            "PATH": os.environ.get("PATH", "/usr/bin:/bin:/usr/local/bin"),
+            "HOME": os.environ.get("HOME", "/root"),
+            "LANG": os.environ.get("LANG", "en_US.UTF-8"),
+        }
+        result = subprocess.run(
+            command, shell=True, capture_output=True,
+            text=True, timeout=timeout, cwd='/tmp/repo',
+            env=safe_env
+        )
+        output = result.stdout
+        if result.stderr:
+            output += f"\n[stderr]: {result.stderr}"
+        if not output.strip():
+            return f"[exit code {result.returncode}, no output]"
+        if len(output) > 8000:
+            output = output[:8000] + f"\n...[truncated, {len(output)} total chars]"
+        return output
+    except subprocess.TimeoutExpired:
+        return f"ERROR: Command timed out after {timeout}s"
+    except Exception as e:
+        return f"ERROR: {str(e)}"
+
+
+TOOLS = [
+    {
+        "name": "bash",
+        "description": "Run a shell command in the repository root.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "command": {"type": "string", "description": "The shell command to run"}
+            },
+            "required": ["command"]
+        }
+    }
+]
+
+SYSTEM_PROMPT = """You are a senior software engineer reviewing a pull request.
+
+The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.
+
+Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). Use whichever makes more sense for each issue.
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.
+"""
+
+
+def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
+    """Run the agentic loop using Anthropic's tool use API."""
+    messages = [
+        {
+            "role": "user",
+            "content": f"Please review this pull request:\n\n{pr_diff}"
+        }
+    ]
+    tool_call_count = 0
+
+    while True:
+        response = client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=4096,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages
+        )
+
+        messages.append({"role": "assistant", "content": response.content})
+
+        if response.stop_reason == "end_turn":
+            print(f"end_turn content: {response.content}", file=sys.stderr)
+            for block in response.content:
+                if hasattr(block, 'text') and block.text.strip():
+                    return block.text
+            return ""
+
+        if response.stop_reason == "tool_use":
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_call_count += 1
+                    print(f"Tool call {tool_call_count}/{max_tool_calls}: {block.name}({json.dumps(block.input)})", file=sys.stderr)
+                    result = run_bash(block.input["command"]) if block.name == "bash" else f"Unknown tool: {block.name}"
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": result
+                    })
+
+            messages.append({"role": "user", "content": tool_results})
+
+            if tool_call_count >= max_tool_calls:
+                print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr)
+                messages.append({"role": "user", "content": "You've used the maximum number of tool calls. Based on everything you've seen, provide your final review as JSON now."})
+                response = client.messages.create(
+                    model="claude-haiku-4-5-20251001",
+                    max_tokens=4096,
+                    system=SYSTEM_PROMPT,
+                    messages=messages
+                )
+                print(f"Final response stop_reason={response.stop_reason} content={response.content}", file=sys.stderr)
+                for block in response.content:
+                    if hasattr(block, 'text') and block.text.strip():
+                        return block.text
+                return ""
+        else:
+            break
+
+    return ""
+
+
+def main():
+    repo_name = os.environ.get('REPO')
+    commit_sha = os.environ.get('COMMIT_SHA')
+    branch_name = os.environ.get('BRANCH', 'main')
+    anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY')
+    github_token = os.environ.get('GITHUB_TOKEN')
+    max_tool_calls = int(os.environ.get('MAX_TOOL_CALLS', '10'))
+
+    if not all([repo_name, commit_sha, anthropic_api_key, github_token]):
+        print(json.dumps({"error": "Missing required environment variables", "findings": []}))
+        sys.exit(1)
+
+    # Clone the repository at the PR commit
+    print("Cloning repository...", file=sys.stderr)
+    clone_dir = '/tmp/repo'
+    clone_url = f"https://{github_token}@github.com/{repo_name}.git"
+
+    try:
+        subprocess.run(['rm', '-rf', clone_dir], check=True)
+        subprocess.run(
+            ['git', 'clone', '--depth=1', '--branch', branch_name, clone_url, clone_dir],
+            check=True, capture_output=True,
+            env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'}
+        )
+        print(f"Cloned {repo_name} branch {branch_name} (shallow)", file=sys.stderr)
+    except subprocess.CalledProcessError as e:
+        print(json.dumps({"error": f"Failed to clone: {e.stderr.decode() if e.stderr else str(e)}", "findings": []}))
+        sys.exit(1)
+
+    os.chdir(clone_dir)
+
+    # Read PR diff from file written by server.py
+    try:
+        with open('/app/pr.diff', 'r', encoding='utf-8') as f:
+            pr_diff = f.read()
+    except Exception as e:
+        print(f"Warning: Could not read PR diff ({e})", file=sys.stderr)
+        pr_diff = ""
+
+    # Run the agentic review
+    print("Running agent...", file=sys.stderr)
+    client = Anthropic(api_key=anthropic_api_key)
+
+    try:
+        final_response = run_agent(client, pr_diff, max_tool_calls)
+
+        if not final_response:
+            raise ValueError("No response from agent")
+
+        # Strip markdown fences if present
+        cleaned = final_response.strip()
+        if cleaned.startswith('```'):
+            lines = cleaned.split('\n')[1:]
+            if lines and lines[-1].strip() == '```':
+                lines = lines[:-1]
+            cleaned = '\n'.join(lines).strip()
+
+        output = json.loads(cleaned)
+        if not isinstance(output.get('findings'), list):
+            output['findings'] = []
+        output['findings'] = output['findings'][:3]
+
+        print(json.dumps(output))
+        sys.exit(0)
+
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []}))
+    except Exception as e:
+        print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []}))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,5 @@
+e2b>=1.0.0
+anthropic>=0.25.0
 openai
 requests
 python-dotenv