From b9f02ec44bb0aa6b47c55126ff21d68b2c08f2c8 Mon Sep 17 00:00:00 2001 From: vineeshah Date: Wed, 8 Apr 2026 12:55:25 -0700 Subject: [PATCH 1/5] feat : anthropic agent sdk on e2b --- agent.py | 209 +++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 3 +- review.py | 146 ++++++++++++++++++++++++++++++++- server.py | 205 +++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 559 insertions(+), 4 deletions(-) create mode 100644 agent.py diff --git a/agent.py b/agent.py new file mode 100644 index 0000000..e5bbdcc --- /dev/null +++ b/agent.py @@ -0,0 +1,209 @@ +import os +import subprocess +import json +import sys +from anthropic import Anthropic + + +def run_bash(command: str, timeout: int = 30) -> str: + """Run a shell command in the cloned repo directory with secrets stripped from env.""" + try: + # Minimal env — no secrets accessible to shell commands (prevents prompt injection exfiltration) + safe_env = { + "PATH": os.environ.get("PATH", "/usr/bin:/bin:/usr/local/bin"), + "HOME": os.environ.get("HOME", "/root"), + "LANG": os.environ.get("LANG", "en_US.UTF-8"), + } + result = subprocess.run( + command, shell=True, capture_output=True, + text=True, timeout=timeout, cwd='/tmp/repo', + env=safe_env + ) + output = result.stdout + if result.stderr: + output += f"\n[stderr]: {result.stderr}" + if not output.strip(): + return f"[exit code {result.returncode}, no output]" + if len(output) > 8000: + output = output[:8000] + f"\n...[truncated, {len(output)} total chars]" + return output + except subprocess.TimeoutExpired: + return f"ERROR: Command timed out after {timeout}s" + except Exception as e: + return f"ERROR: {str(e)}" + + +TOOLS = [ + { + "name": "bash", + "description": "Run a shell command in the repository root.", + "input_schema": { + "type": "object", + "properties": { + "command": {"type": "string", "description": "The shell command to run"} + }, + "required": ["command"] + } + } +] + +SYSTEM_PROMPT = """You are a senior software engineer reviewing a pull request. + +The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact. + +Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). Use whichever makes more sense for each issue. + +{ + "findings": [ + { + "file": "path/to/file.py", + "line": 123, + "severity": "critical" | "high" | "medium" | "low", + "message": "..." + }, + { + "severity": "medium", + "message": "Overall: ..." + } + ] +} + +If there are no significant issues, return {"findings": []}. +""" + + +def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str: + """Run the agentic loop using Anthropic's tool use API.""" + messages = [ + { + "role": "user", + "content": f"Please review this pull request:\n\n{pr_diff}" + } + ] + tool_call_count = 0 + + while True: + response = client.messages.create( + model="claude-haiku-4-5", + max_tokens=4096, + system=SYSTEM_PROMPT, + tools=TOOLS, + messages=messages + ) + + messages.append({"role": "assistant", "content": response.content}) + + if response.stop_reason == "end_turn": + for block in response.content: + if hasattr(block, 'text'): + return block.text + return "" + + if response.stop_reason == "tool_use": + tool_results = [] + for block in response.content: + if block.type == "tool_use": + tool_call_count += 1 + print(f"Tool call {tool_call_count}/{max_tool_calls}: {block.name}({json.dumps(block.input)})", file=sys.stderr) + result = run_bash(block.input["command"]) if block.name == "bash" else f"Unknown tool: {block.name}" + tool_results.append({ + "type": "tool_result", + "tool_use_id": block.id, + "content": result + }) + + messages.append({"role": "user", "content": tool_results}) + + if tool_call_count >= max_tool_calls: + print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr) + response = client.messages.create( + model="claude-haiku-4-5", + max_tokens=4096, + system=SYSTEM_PROMPT, + messages=messages + ) + for block in response.content: + if hasattr(block, 'text'): + return block.text + return "" + else: + break + + return "" + + +def main(): + repo_name = os.environ.get('REPO') + commit_sha = os.environ.get('COMMIT_SHA') + branch_name = os.environ.get('BRANCH', 'main') + anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY') + github_token = os.environ.get('GITHUB_TOKEN') + max_tool_calls = int(os.environ.get('MAX_TOOL_CALLS', '10')) + + if not all([repo_name, commit_sha, anthropic_api_key, github_token]): + print(json.dumps({"error": "Missing required environment variables", "findings": []})) + sys.exit(1) + + # Clone the repository at the PR commit + print("Cloning repository...", file=sys.stderr) + clone_dir = '/tmp/repo' + clone_url = f"https://{github_token}@github.com/{repo_name}.git" + + try: + subprocess.run(['rm', '-rf', clone_dir], check=True) + subprocess.run( + ['git', 'clone', '--depth=1', '--branch', branch_name, clone_url, clone_dir], + check=True, capture_output=True, + env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'} + ) + print(f"Cloned {repo_name} branch {branch_name} (shallow)", file=sys.stderr) + except subprocess.CalledProcessError as e: + print(json.dumps({"error": f"Failed to clone: {e.stderr.decode() if e.stderr else str(e)}", "findings": []})) + sys.exit(1) + + os.chdir(clone_dir) + + # Read PR diff from file written by server.py + try: + with open('/app/pr.diff', 'r', encoding='utf-8') as f: + pr_diff = f.read() + except Exception as e: + print(f"Warning: Could not read PR diff ({e})", file=sys.stderr) + pr_diff = "" + + # Run the agentic review + print("Running agent...", file=sys.stderr) + client = Anthropic(api_key=anthropic_api_key) + + try: + final_response = run_agent(client, pr_diff, max_tool_calls) + + if not final_response: + raise ValueError("No response from agent") + + # Strip markdown fences if present + cleaned = final_response.strip() + if cleaned.startswith('```'): + lines = cleaned.split('\n')[1:] + if lines and lines[-1].strip() == '```': + lines = lines[:-1] + cleaned = '\n'.join(lines).strip() + + output = json.loads(cleaned) + if not isinstance(output.get('findings'), list): + output['findings'] = [] + output['findings'] = output['findings'][:3] + + print(json.dumps(output)) + sys.exit(0) + + except json.JSONDecodeError as e: + print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []})) + sys.exit(1) + except Exception as e: + print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []})) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/requirements.txt b/requirements.txt index 137a9b2..31f88f1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -openai +e2b>=1.0.0 +anthropic>=0.25.0 requests python-dotenv fastapi diff --git a/review.py b/review.py index 50d475f..a7a3b93 100644 --- a/review.py +++ b/review.py @@ -40,6 +40,43 @@ # Helper Functions # ------------------------- +def clean_and_parse_json(text: str): + """Clean LLM response and parse JSON, removing markdown fences and extracting the first JSON object.""" + if not text: + raise ValueError("Empty response") + + cleaned = text.strip() + + # Remove markdown code fences (```json ... ``` or ``` ... ```) + if cleaned.startswith("```"): + lines = cleaned.split('\n') + # Skip first line if it's a fence + if lines[0].strip().startswith("```"): + lines = lines[1:] + # Remove last line if it's a closing fence + if lines and lines[-1].strip() == "```": + lines = lines[:-1] + cleaned = '\n'.join(lines).strip() + + # Try direct parse first + try: + return json.loads(cleaned) + except json.JSONDecodeError: + pass # Continue to brace extraction + + # Find the first '{' and last '}' to extract a JSON object + start = cleaned.find('{') + end = cleaned.rfind('}') + if start != -1 and end != -1 and end > start: + candidate = cleaned[start:end+1] + try: + return json.loads(candidate) + except json.JSONDecodeError: + pass + + # If we get here, parsing failed + raise ValueError(f"Could not extract valid JSON from response (first 200 chars): {text[:200]}...") + def should_skip_file(filename): """Check if a file should be skipped based on extension or directory.""" # Check extension @@ -189,6 +226,7 @@ def process_review(repo: str, pr_number: int, installation_id: int): "comments": [ {{ "line": , + "severity": "", "message": "" }} ] @@ -197,6 +235,8 @@ def process_review(repo: str, pr_number: int, installation_id: int): If there are no issues, return: {{ "comments": [] }} + +IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting (such as ```json code fences```). """ try: @@ -210,18 +250,122 @@ def process_review(repo: str, pr_number: int, installation_id: int): ) result = response.choices[0].message.content - data = json.loads(result) + try: + data = clean_and_parse_json(result) + except Exception as e: + print(f"Failed to parse LLM response for {filename}: {e}") + continue for c in data["comments"]: comments.append({ "path": filename, "line": c["line"], + "severity": c.get("severity", "medium"), "body": c["message"] }) except Exception as e: print(f"LLM review failed for {filename}: {e}") + # Second pass: Consolidate and prioritize all comments (max 5 total) + all_file_comments = comments # Save all collected comments + print(f"\nCollected {len(all_file_comments)} potential issues. Consolidating to max {MAX_COMMENTS_PER_PR}...") + + comments = [] + if all_file_comments: + # Build a summary of all issues for prioritization + issues_summary = "" + for idx, comment in enumerate(all_file_comments, 1): + issues_summary += f"{idx}. [{comment['severity'].upper()}] {comment['path']}:{comment['line']} - {comment['body'][:100]}...\n" + + consolidation_prompt = f""" +You are a senior software engineer conducting a final PR review. + +The following potential issues were identified across all files: + +{issues_summary} + +Select at most {MAX_COMMENTS_PER_PR} of the MOST CRITICAL issues to actually comment on. + +IMPORTANT RULES: +- Prioritize critical and high severity issues first +- Skip low-severity issues (style, nitpicks) unless there's something really important +- If multiple issues are related, consider if they can be combined into a single comment +- Be conservative - it's better to have fewer, more impactful comments than many minor ones +- **You MUST include at least one comment (either a selected issue or a summary comment), even if the code looks perfect.** If no significant issues are found, provide a positive or neutral summary comment like "No significant issues found. The changes look good." + +Return JSON in this format: + +{{ + "selected_issues": [ + {{ + "original_index": , + "comment": {{ + "line": , + "message": "" + }} + }} + ], + "summary_comment": "" +}} + +Do NOT return empty strings. Ensure either selected_issues contains at least one item OR summary_comment is non-empty. + +IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting. +""" + + try: + response = client.chat.completions.create( + model="gpt-4.1-mini", + messages=[ + {"role": "system", "content": "You are a senior software engineer with extreme technical expertise."}, + {"role": "user", "content": consolidation_prompt} + ], + temperature=0 + ) + + result = response.choices[0].message.content + try: + data = clean_and_parse_json(result) + except Exception as e: + raise ValueError(f"Consolidation JSON parsing failed: {e}") + + # Add selected specific comments + for selected in data["selected_issues"][:MAX_COMMENTS_PER_PR]: + idx = selected["original_index"] - 1 + if 0 <= idx < len(all_file_comments): + original = all_file_comments[idx] + comments.append({ + "path": original["path"], + "line": selected["comment"]["line"], + "body": selected["comment"]["message"] + }) + + # Optionally add summary comment if present and we have room + if data.get("summary_comment") and len(comments) < MAX_COMMENTS_PER_PR: + # Summary comments are posted as general PR comments (no line number) + comments.append({ + "path": None, # Indicates general PR comment + "line": None, + "body": data["summary_comment"] + }) + + except Exception as e: + print(f"Consolidation failed, falling back to all comments: {e}") + # Fallback: just use all comments but respect the limit + comments = [ + {"path": c["path"], "line": c["line"], "body": c["body"]} + for c in all_file_comments[:MAX_COMMENTS_PER_PR] + ] + + # Ensure at least one comment is posted, even if no issues were found + if len(comments) == 0: + comments.append({ + "path": None, + "line": None, + "body": "Review complete: No significant issues found. The changes look good." + }) + # ------------------------- # Get commit SHA # ------------------------- diff --git a/server.py b/server.py index 4d85de4..4f6c9b8 100644 --- a/server.py +++ b/server.py @@ -3,12 +3,18 @@ import hashlib import os import json +import threading +import requests +from e2b import Sandbox from github_app import GitHubAppAuth from review import process_review app = FastAPI(title="PR Review Bot") WEBHOOK_SECRET = os.environ.get('GITHUB_WEBHOOK_SECRET') +# Semaphore to limit concurrent E2B sandboxes (max 3) +e2b_semaphore = threading.Semaphore(3) + def verify_signature(request_body: bytes, signature: str) -> bool: """Verify webhook signature from GitHub.""" if not signature or not WEBHOOK_SECRET: @@ -22,6 +28,201 @@ def verify_signature(request_body: bytes, signature: str) -> bool: return hmac.compare_digest(signature, expected) +def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): + """ + Run PR review using E2B sandbox and Anthropic Agent SDK. + Falls back to process_review if E2B fails. + """ + print(f"[E2B] Starting review for PR #{pr_number} in {repo}") + + # Acquire semaphore to limit concurrent sandboxes + acquired = False + try: + acquired = e2b_semaphore.acquire(blocking=True, timeout=60) + if not acquired: + print("[E2B] Timeout acquiring semaphore, falling back to local review") + process_review(repo, pr_number, installation_id) + return + except Exception as e: + print(f"[E2B] Semaphore error: {e}, falling back to local review") + process_review(repo, pr_number, installation_id) + return + + sandbox = None + try: + # Get GitHub App auth token + auth = GitHubAppAuth( + app_id=os.environ["GITHUB_APP_ID"], + private_key_path=os.environ["GITHUB_PRIVATE_KEY_PATH"], + installation_id=installation_id + ) + token = auth.get_installation_token() + + # Setup headers with installation token + headers = { + "Authorization": f"token {token}", + "Accept": "application/vnd.github+json" + } + + # Fetch PR data to get diff and commit SHA + pr_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}" + try: + pr_response = requests.get(pr_url, headers=headers) + pr_response.raise_for_status() + pr_data = pr_response.json() + commit_sha = pr_data["head"]["sha"] + branch_name = pr_data["head"]["ref"] + except Exception as e: + print(f"[E2B] Failed to fetch PR data: {e}, falling back to local review") + process_review(repo, pr_number, installation_id) + return + + # Get PR diff (unified diff format) + diff_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files" + try: + diff_response = requests.get(diff_url, headers=headers) + diff_response.raise_for_status() + files_data = diff_response.json() + + # Build unified diff from files data + diff_parts = [] + for file in files_data: + if 'patch' in file: + diff_parts.append(f"--- a/{file['filename']}") + diff_parts.append(f"+++ b/{file['filename']}") + diff_parts.append(file['patch']) + pr_diff = '\n'.join(diff_parts) + except Exception as e: + print(f"[E2B] Failed to fetch PR diff: {e}, falling back to local review") + process_review(repo, pr_number, installation_id) + return + + # Create E2B sandbox + print("[E2B] Creating sandbox...") + e2b_api_key = os.environ.get("E2B_API_KEY") + if not e2b_api_key: + raise ValueError("E2B_API_KEY not set") + + sandbox = Sandbox(api_key=e2b_api_key, template="claude", timeout=600)#id is wunszvjeuyrdgrt0z6o9 + print("[E2B] Sandbox created") + + # Upload agent.py to sandbox + with open('/app/agent.py', 'r') as f: + agent_code = f.read() + sandbox.files.write('/app/agent.py', agent_code) + print("[E2B] agent.py uploaded") + + # Write PR diff to a file to avoid env var size limits for large PRs + sandbox.files.write('/app/pr.diff', pr_diff) + print("[E2B] PR diff written to /app/pr.diff") + + # Run the agent with env vars passed directly to the command + print("[E2B] Starting agent process...") + agent_envs = { + 'REPO': repo, + 'COMMIT_SHA': commit_sha, + 'BRANCH': branch_name, + 'GITHUB_TOKEN': token, + 'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""), + 'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"), + } + + stdout_chunks = [] + sandbox.commands.run( + "python /app/agent.py", + envs=agent_envs, + timeout=580, + on_stdout=lambda data: stdout_chunks.append(data), + on_stderr=lambda data: print(f"[Agent] {data}", end='', flush=True) + ) + stdout = ''.join(stdout_chunks) + + print("[E2B] Agent process completed") + + # Parse the JSON output + try: + output = json.loads(stdout) + findings = output.get('findings', []) + + # Cap at 3 findings + findings = findings[:3] + print(f"[E2B] Review completed: {len(findings)} findings") + + # Build set of files in this PR for validation of inline comments + pr_files = {f['filename'] for f in files_data} + + for finding in findings: + file_path = finding.get('file') + line = finding.get('line') + severity = finding.get('severity', 'medium') + message = finding.get('message', '') + + if not message: + continue + + comment_body = f"**[{severity.upper()}]** {message}" + + if file_path and line: + # Inline comment — validate file is in the PR diff + if file_path not in pr_files: + print(f"[E2B] Skipping inline finding: {file_path} not in PR diff") + continue + comment_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments" + payload = { + "body": comment_body, + "commit_id": commit_sha, + "path": file_path, + "line": line + } + label = f"{file_path}:{line}" + else: + # Overall assessment — post as a general PR comment + comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" + payload = {"body": comment_body} + label = "overall" + + try: + response = requests.post(comment_url, headers=headers, json=payload) + response.raise_for_status() + print(f"[E2B] Posted comment ({label})") + except Exception as e: + print(f"[E2B] Failed to post comment ({label}): {e}") + + if not findings: + comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" + payload = {"body": "Review complete: No significant issues found. The changes look good."} + try: + response = requests.post(comment_url, headers=headers, json=payload) + response.raise_for_status() + print("[E2B] Posted summary comment") + except Exception as e: + print(f"[E2B] Failed to post summary comment: {e}") + + except json.JSONDecodeError as e: + print(f"[E2B] Failed to parse agent output: {e}") + print(f"[E2B] Agent output (first 500 chars): {stdout[:500]}") + print("[E2B] Falling back to local review") + process_review(repo, pr_number, installation_id) + + except Exception as e: + print(f"[E2B] Error during review: {e}") + print("[E2B] Falling back to local review") + process_review(repo, pr_number, installation_id) + + finally: + # Ensure sandbox is closed + if sandbox: + try: + sandbox.close() + print("[E2B] Sandbox closed") + except Exception as e: + print(f"[E2B] Error closing sandbox: {e}") + + # Release semaphore + if acquired: + e2b_semaphore.release() + print("[E2B] Review task completed") + @app.post('/webhook') async def webhook_handler(request: Request, background_tasks: BackgroundTasks): # Verify signature @@ -40,9 +241,9 @@ async def webhook_handler(request: Request, background_tasks: BackgroundTasks): repo = event['pull_request']['base']['repo']['full_name'] installation_id = event['installation']['id'] - # Run review in background (don't block webhook response) + # Run review in E2B sandbox in background (don't block webhook response) background_tasks.add_task( - process_review, + run_review_in_e2b, repo=repo, pr_number=pr_number, installation_id=installation_id From 96db2f23b666f822c920e0b71b73c8a92943120b Mon Sep 17 00:00:00 2001 From: vineeshah Date: Wed, 8 Apr 2026 13:57:04 -0700 Subject: [PATCH 2/5] fix: syntax --- requirements.txt | 1 + server.py | 20 +++++++++++++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 31f88f1..2cf69ef 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ e2b>=1.0.0 anthropic>=0.25.0 +openai requests python-dotenv fastapi diff --git a/server.py b/server.py index 4f6c9b8..7742e63 100644 --- a/server.py +++ b/server.py @@ -99,11 +99,10 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): # Create E2B sandbox print("[E2B] Creating sandbox...") - e2b_api_key = os.environ.get("E2B_API_KEY") - if not e2b_api_key: + if not os.environ.get("E2B_API_KEY"): raise ValueError("E2B_API_KEY not set") - sandbox = Sandbox(api_key=e2b_api_key, template="claude", timeout=600)#id is wunszvjeuyrdgrt0z6o9 + sandbox = Sandbox(template="claude", timeout=600) print("[E2B] Sandbox created") # Upload agent.py to sandbox @@ -185,8 +184,19 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): response = requests.post(comment_url, headers=headers, json=payload) response.raise_for_status() print(f"[E2B] Posted comment ({label})") - except Exception as e: - print(f"[E2B] Failed to post comment ({label}): {e}") + except requests.HTTPError as e: + if e.response.status_code == 422 and file_path and line: + # Line not in diff — fall back to general issue comment + print(f"[E2B] Inline comment rejected (line not in diff), posting as issue comment ({label})") + fallback_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" + fallback_payload = {"body": f"**[{severity.upper()}]** `{file_path}` (line {line}): {message}"} + try: + requests.post(fallback_url, headers=headers, json=fallback_payload).raise_for_status() + print(f"[E2B] Posted fallback comment ({label})") + except Exception as e2: + print(f"[E2B] Failed to post fallback comment: {e2}") + else: + print(f"[E2B] Failed to post comment ({label}): {e}") if not findings: comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" From eccf29b0d5aca4047dc2dcd7096942445a8d0401 Mon Sep 17 00:00:00 2001 From: vineeshah Date: Wed, 8 Apr 2026 17:00:45 -0700 Subject: [PATCH 3/5] fixes suggested by the bot --- agent.py | 13 +++++++------ server.py | 49 +++++++++++++++++++++---------------------------- 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/agent.py b/agent.py index e5bbdcc..12cfd8e 100644 --- a/agent.py +++ b/agent.py @@ -84,7 +84,7 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str: while True: response = client.messages.create( - model="claude-haiku-4-5", + model="claude-haiku-4-5-20251001", max_tokens=4096, system=SYSTEM_PROMPT, tools=TOOLS, @@ -94,8 +94,9 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str: messages.append({"role": "assistant", "content": response.content}) if response.stop_reason == "end_turn": + print(f"end_turn content: {response.content}", file=sys.stderr) for block in response.content: - if hasattr(block, 'text'): + if hasattr(block, 'text') and block.text.strip(): return block.text return "" @@ -116,14 +117,16 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str: if tool_call_count >= max_tool_calls: print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr) + messages.append({"role": "user", "content": "You've used the maximum number of tool calls. Based on everything you've seen, provide your final review as JSON now."}) response = client.messages.create( - model="claude-haiku-4-5", + model="claude-haiku-4-5-20251001", max_tokens=4096, system=SYSTEM_PROMPT, messages=messages ) + print(f"Final response stop_reason={response.stop_reason} content={response.content}", file=sys.stderr) for block in response.content: - if hasattr(block, 'text'): + if hasattr(block, 'text') and block.text.strip(): return block.text return "" else: @@ -199,10 +202,8 @@ def main(): except json.JSONDecodeError as e: print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []})) - sys.exit(1) except Exception as e: print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []})) - sys.exit(1) if __name__ == '__main__': diff --git a/server.py b/server.py index 7742e63..17ef17a 100644 --- a/server.py +++ b/server.py @@ -7,7 +7,6 @@ import requests from e2b import Sandbox from github_app import GitHubAppAuth -from review import process_review app = FastAPI(title="PR Review Bot") WEBHOOK_SECRET = os.environ.get('GITHUB_WEBHOOK_SECRET') @@ -31,7 +30,7 @@ def verify_signature(request_body: bytes, signature: str) -> bool: def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): """ Run PR review using E2B sandbox and Anthropic Agent SDK. - Falls back to process_review if E2B fails. + Runs PR review using E2B sandbox and Anthropic agent. """ print(f"[E2B] Starting review for PR #{pr_number} in {repo}") @@ -40,15 +39,14 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): try: acquired = e2b_semaphore.acquire(blocking=True, timeout=60) if not acquired: - print("[E2B] Timeout acquiring semaphore, falling back to local review") - process_review(repo, pr_number, installation_id) + print("[E2B] Timeout acquiring semaphore, skipping review") return except Exception as e: - print(f"[E2B] Semaphore error: {e}, falling back to local review") - process_review(repo, pr_number, installation_id) + print(f"[E2B] Semaphore error: {e}") return sandbox = None + stdout_chunks = [] try: # Get GitHub App auth token auth = GitHubAppAuth( @@ -73,8 +71,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): commit_sha = pr_data["head"]["sha"] branch_name = pr_data["head"]["ref"] except Exception as e: - print(f"[E2B] Failed to fetch PR data: {e}, falling back to local review") - process_review(repo, pr_number, installation_id) + print(f"[E2B] Failed to fetch PR data: {e}") return # Get PR diff (unified diff format) @@ -93,8 +90,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): diff_parts.append(file['patch']) pr_diff = '\n'.join(diff_parts) except Exception as e: - print(f"[E2B] Failed to fetch PR diff: {e}, falling back to local review") - process_review(repo, pr_number, installation_id) + print(f"[E2B] Failed to fetch PR diff: {e}") return # Create E2B sandbox @@ -102,7 +98,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): if not os.environ.get("E2B_API_KEY"): raise ValueError("E2B_API_KEY not set") - sandbox = Sandbox(template="claude", timeout=600) + sandbox = Sandbox.create(template="claude", timeout=300) print("[E2B] Sandbox created") # Upload agent.py to sandbox @@ -115,6 +111,11 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): sandbox.files.write('/app/pr.diff', pr_diff) print("[E2B] PR diff written to /app/pr.diff") + # Install dependencies in the sandbox + print("[E2B] Installing dependencies...") + sandbox.commands.run("pip3 install anthropic -q --break-system-packages", timeout=120) + print("[E2B] Dependencies installed") + # Run the agent with env vars passed directly to the command print("[E2B] Starting agent process...") agent_envs = { @@ -128,9 +129,9 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): stdout_chunks = [] sandbox.commands.run( - "python /app/agent.py", + "python3 /app/agent.py", envs=agent_envs, - timeout=580, + timeout=290, on_stdout=lambda data: stdout_chunks.append(data), on_stderr=lambda data: print(f"[Agent] {data}", end='', flush=True) ) @@ -139,6 +140,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): print("[E2B] Agent process completed") # Parse the JSON output + print(f"[E2B] Agent stdout: {stdout[:500]}") try: output = json.loads(stdout) findings = output.get('findings', []) @@ -199,34 +201,25 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): print(f"[E2B] Failed to post comment ({label}): {e}") if not findings: - comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments" - payload = {"body": "Review complete: No significant issues found. The changes look good."} - try: - response = requests.post(comment_url, headers=headers, json=payload) - response.raise_for_status() - print("[E2B] Posted summary comment") - except Exception as e: - print(f"[E2B] Failed to post summary comment: {e}") + print("[E2B] No findings, skipping comment") except json.JSONDecodeError as e: print(f"[E2B] Failed to parse agent output: {e}") print(f"[E2B] Agent output (first 500 chars): {stdout[:500]}") - print("[E2B] Falling back to local review") - process_review(repo, pr_number, installation_id) except Exception as e: print(f"[E2B] Error during review: {e}") - print("[E2B] Falling back to local review") - process_review(repo, pr_number, installation_id) + if stdout_chunks: + print(f"[E2B] Agent stdout: {''.join(stdout_chunks)}") finally: # Ensure sandbox is closed if sandbox: try: - sandbox.close() - print("[E2B] Sandbox closed") + sandbox.kill() + print("[E2B] Sandbox killed") except Exception as e: - print(f"[E2B] Error closing sandbox: {e}") + print(f"[E2B] Error killing sandbox: {e}") # Release semaphore if acquired: From 2ee04eab6eef5335f41d3c87f5b78c7e1e5fbcb4 Mon Sep 17 00:00:00 2001 From: vineeshah Date: Wed, 8 Apr 2026 17:38:27 -0700 Subject: [PATCH 4/5] vercel ai sdk --- agent.mjs | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ server.py | 23 ++++---- 2 files changed, 170 insertions(+), 9 deletions(-) create mode 100644 agent.mjs diff --git a/agent.mjs b/agent.mjs new file mode 100644 index 0000000..2c746ef --- /dev/null +++ b/agent.mjs @@ -0,0 +1,156 @@ +import { generateText, tool } from 'ai'; +import { createAnthropic } from '@ai-sdk/anthropic'; +import { createOpenAI } from '@ai-sdk/openai'; +import { createGoogleGenerativeAI } from '@ai-sdk/google'; +import { z } from 'zod'; +import { spawnSync } from 'child_process'; +import { readFileSync } from 'fs'; + +function runBash(command, timeout = 30000) { + const safeEnv = { + PATH: process.env.PATH || '/usr/bin:/bin:/usr/local/bin', + HOME: process.env.HOME || '/root', + LANG: process.env.LANG || 'en_US.UTF-8', + }; + + const result = spawnSync('sh', ['-c', command], { + cwd: '/tmp/repo', + env: safeEnv, + timeout, + encoding: 'utf8', + }); + + if (result.error?.code === 'ETIMEDOUT') return `ERROR: Command timed out after ${timeout / 1000}s`; + if (result.error) return `ERROR: ${result.error.message}`; + + let output = result.stdout || ''; + if (result.stderr) output += `\n[stderr]: ${result.stderr}`; + if (!output.trim()) return `[exit code ${result.status}, no output]`; + if (output.length > 8000) output = output.slice(0, 8000) + `\n...[truncated, ${output.length} total chars]`; + return output; +} + +const SYSTEM_PROMPT = `You are a senior software engineer reviewing a pull request. + +The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact. + +Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). + +{ + "findings": [ + { + "file": "path/to/file.py", + "line": 123, + "severity": "critical" | "high" | "medium" | "low", + "message": "..." + }, + { + "severity": "medium", + "message": "Overall: ..." + } + ] +} + +If there are no significant issues, return {"findings": []}.`; + +function getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey) { + if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3')) { + return createOpenAI({ apiKey: openaiApiKey })(modelId); + } + if (modelId.startsWith('gemini-')) { + return createGoogleGenerativeAI({ apiKey: googleApiKey })(modelId); + } + return createAnthropic({ apiKey: anthropicApiKey })(modelId); +} + +async function main() { + const repoName = process.env.REPO; + const branchName = process.env.BRANCH || 'main'; + const anthropicApiKey = process.env.ANTHROPIC_API_KEY; + const openaiApiKey = process.env.OPENAI_API_KEY || ''; + const googleApiKey = process.env.GOOGLE_API_KEY || ''; + const githubToken = process.env.GITHUB_TOKEN; + const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10'); + const modelId = process.env.MODEL || 'claude-haiku-4-5-20251001'; + + if (!repoName || !anthropicApiKey || !githubToken) { + console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] })); + process.exit(0); + } + + // Clone the repository + process.stderr.write('Cloning repository...\n'); + const cloneDir = '/tmp/repo'; + const cloneUrl = `https://${githubToken}@github.com/${repoName}.git`; + + spawnSync('rm', ['-rf', cloneDir]); + const cloneResult = spawnSync( + 'git', + ['clone', '--depth=1', '--branch', branchName, cloneUrl, cloneDir], + { encoding: 'utf8', env: { ...process.env, GIT_TERMINAL_PROMPT: '0' } } + ); + + if (cloneResult.status !== 0) { + console.log(JSON.stringify({ error: `Failed to clone: ${cloneResult.stderr || 'unknown error'}`, findings: [] })); + process.exit(0); + } + process.stderr.write(`Cloned ${repoName} branch ${branchName} (shallow)\n`); + + // Strip token from git config to prevent exfiltration via bash tool + spawnSync('git', ['remote', 'set-url', 'origin', `https://github.com/${repoName}.git`], { + cwd: cloneDir, encoding: 'utf8', + }); + + // Read PR diff + let prDiff = ''; + try { + prDiff = readFileSync('/app/pr.diff', 'utf8'); + } catch (e) { + process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`); + } + + process.stderr.write('Running agent...\n'); + + try { + const model = getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey); + let stepCount = 0; + + const { text } = await generateText({ + model, + system: SYSTEM_PROMPT, + prompt: `Please review this pull request:\n\n${prDiff}`, + maxSteps, + tools: { + bash: tool({ + description: 'Run a shell command in the repository root.', + parameters: z.object({ command: z.string() }), + execute: async ({ command }) => { + stepCount++; + process.stderr.write(`Tool call ${stepCount}/${maxSteps}: bash(${JSON.stringify({ command })})\n`); + return runBash(command); + }, + }), + }, + }); + + // Strip markdown fences if present + let cleaned = text.trim(); + if (cleaned.startsWith('```')) { + const lines = cleaned.split('\n').slice(1); + if (lines.at(-1)?.trim() === '```') lines.pop(); + cleaned = lines.join('\n').trim(); + } + + const output = JSON.parse(cleaned); + if (!Array.isArray(output.findings)) output.findings = []; + output.findings = output.findings.slice(0, 3); + + console.log(JSON.stringify(output)); + process.exit(0); + } catch (e) { + console.log(JSON.stringify({ error: `Agent failed: ${e.message}`, findings: [] })); + process.exit(0); + } +} + +main(); diff --git a/server.py b/server.py index 17ef17a..5e19675 100644 --- a/server.py +++ b/server.py @@ -101,20 +101,22 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): sandbox = Sandbox.create(template="claude", timeout=300) print("[E2B] Sandbox created") - # Upload agent.py to sandbox - with open('/app/agent.py', 'r') as f: + # Upload agent.mjs to sandbox + with open('/app/agent.mjs', 'r') as f: agent_code = f.read() - sandbox.files.write('/app/agent.py', agent_code) - print("[E2B] agent.py uploaded") + sandbox.files.write('/app/agent.mjs', agent_code) + print("[E2B] agent.mjs uploaded") # Write PR diff to a file to avoid env var size limits for large PRs sandbox.files.write('/app/pr.diff', pr_diff) print("[E2B] PR diff written to /app/pr.diff") - # Install dependencies in the sandbox - print("[E2B] Installing dependencies...") - sandbox.commands.run("pip3 install anthropic -q --break-system-packages", timeout=120) - print("[E2B] Dependencies installed") + # Write package.json and install Node dependencies + print("[E2B] Installing Node.js dependencies...") + package_json = '{"type":"module","dependencies":{"ai":"^4.0.0","@ai-sdk/anthropic":"^1.0.0","@ai-sdk/openai":"^1.0.0","@ai-sdk/google":"^1.0.0","zod":"^3.0.0"}}' + sandbox.files.write('/app/package.json', package_json) + sandbox.commands.run("cd /app && npm install -q", timeout=120) + print("[E2B] Node.js dependencies installed") # Run the agent with env vars passed directly to the command print("[E2B] Starting agent process...") @@ -124,12 +126,15 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): 'BRANCH': branch_name, 'GITHUB_TOKEN': token, 'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""), + 'OPENAI_API_KEY': os.environ.get("OPENAI_API_KEY", ""), + 'GOOGLE_API_KEY': os.environ.get("GOOGLE_API_KEY", ""), 'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"), + 'MODEL': os.environ.get("MODEL", "claude-haiku-4-5-20251001"), } stdout_chunks = [] sandbox.commands.run( - "python3 /app/agent.py", + "node /app/agent.mjs", envs=agent_envs, timeout=290, on_stdout=lambda data: stdout_chunks.append(data), From 2a7df0335741e64a63ad2a1a1d12dad3e108a668 Mon Sep 17 00:00:00 2001 From: vineeshah Date: Sat, 11 Apr 2026 09:47:09 -0700 Subject: [PATCH 5/5] sandbox debugging --- .dockerignore | 2 ++ Dockerfile | 3 +++ agent.mjs | 51 ++++++++++++++++++++++++++++----------------------- server.py | 37 ++++++++++++++++++++++--------------- 4 files changed, 55 insertions(+), 38 deletions(-) create mode 100644 .dockerignore diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..683d6c2 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +private-key.pem +*.pem diff --git a/Dockerfile b/Dockerfile index f1e9e0f..abe3544 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,9 @@ # Use Python 3.11 slim image FROM python:3.11-slim +# Disable Python output buffering so logs appear immediately +ENV PYTHONUNBUFFERED=1 + # Set working directory WORKDIR /app diff --git a/agent.mjs b/agent.mjs index 2c746ef..1c5f1bc 100644 --- a/agent.mjs +++ b/agent.mjs @@ -1,7 +1,7 @@ -import { generateText, tool } from 'ai'; -import { createAnthropic } from '@ai-sdk/anthropic'; -import { createOpenAI } from '@ai-sdk/openai'; -import { createGoogleGenerativeAI } from '@ai-sdk/google'; +import { generateText, tool, stepCountIs } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; +import { openai } from '@ai-sdk/openai'; +import { google } from '@ai-sdk/google'; import { z } from 'zod'; import { spawnSync } from 'child_process'; import { readFileSync } from 'fs'; @@ -30,11 +30,20 @@ function runBash(command, timeout = 30000) { return output; } -const SYSTEM_PROMPT = `You are a senior software engineer reviewing a pull request. +const SYSTEM_PROMPT = `You are a senior software engineer doing a thorough review of a pull request. -The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact. +The repository is already cloned at the repo root. You have full bash access — use it liberally, there is no cost to running many commands. -Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). +You MUST do all of the following before forming any conclusions: +1. Read each changed file in full, not just the diff +2. Find every caller and usage of any modified function, class, or symbol across the entire repo +3. Read related files — tests, configs, dependent modules, anything that could be affected +4. Check for edge cases: error handling, concurrency, security, null/undefined, type mismatches +5. Run any additional commands needed to fully understand the impact + +Use as many bash calls as you need. Do not cut corners. + +Only after thorough exploration, return at most 3 findings as JSON — no other text. Focus on real bugs, security issues, or broken logic. Skip style nits. { "findings": [ @@ -53,27 +62,24 @@ Return at most 3 findings as JSON — no other text. Each finding can be an inli If there are no significant issues, return {"findings": []}.`; -function getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey) { - if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3')) { - return createOpenAI({ apiKey: openaiApiKey })(modelId); +function getModel(modelId) { + if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3') || modelId.startsWith('o4')) { + return openai(modelId); } if (modelId.startsWith('gemini-')) { - return createGoogleGenerativeAI({ apiKey: googleApiKey })(modelId); + return google(modelId); } - return createAnthropic({ apiKey: anthropicApiKey })(modelId); + return anthropic(modelId); } async function main() { const repoName = process.env.REPO; const branchName = process.env.BRANCH || 'main'; - const anthropicApiKey = process.env.ANTHROPIC_API_KEY; - const openaiApiKey = process.env.OPENAI_API_KEY || ''; - const googleApiKey = process.env.GOOGLE_API_KEY || ''; const githubToken = process.env.GITHUB_TOKEN; const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10'); - const modelId = process.env.MODEL || 'claude-haiku-4-5-20251001'; + const modelId = process.env.MODEL || 'gemini-2.5-flash'; - if (!repoName || !anthropicApiKey || !githubToken) { + if (!repoName || !githubToken) { console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] })); process.exit(0); } @@ -109,24 +115,23 @@ async function main() { process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`); } - process.stderr.write('Running agent...\n'); + process.stderr.write(`Running agent with ${modelId}...\n`); try { - const model = getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey); let stepCount = 0; const { text } = await generateText({ - model, + model: getModel(modelId), system: SYSTEM_PROMPT, prompt: `Please review this pull request:\n\n${prDiff}`, - maxSteps, + stopWhen: stepCountIs(maxSteps), tools: { bash: tool({ description: 'Run a shell command in the repository root.', - parameters: z.object({ command: z.string() }), + inputSchema: z.object({ command: z.string() }), execute: async ({ command }) => { stepCount++; - process.stderr.write(`Tool call ${stepCount}/${maxSteps}: bash(${JSON.stringify({ command })})\n`); + process.stderr.write(`Tool call ${stepCount}: bash(${JSON.stringify({ command })})\n`); return runBash(command); }, }), diff --git a/server.py b/server.py index 5e19675..698cf17 100644 --- a/server.py +++ b/server.py @@ -1,4 +1,4 @@ -from fastapi import FastAPI, Request, HTTPException, BackgroundTasks +from fastapi import FastAPI, Request, HTTPException import hmac import hashlib import os @@ -70,6 +70,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): pr_data = pr_response.json() commit_sha = pr_data["head"]["sha"] branch_name = pr_data["head"]["ref"] + head_repo = pr_data["head"]["repo"]["full_name"] except Exception as e: print(f"[E2B] Failed to fetch PR data: {e}") return @@ -113,7 +114,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): # Write package.json and install Node dependencies print("[E2B] Installing Node.js dependencies...") - package_json = '{"type":"module","dependencies":{"ai":"^4.0.0","@ai-sdk/anthropic":"^1.0.0","@ai-sdk/openai":"^1.0.0","@ai-sdk/google":"^1.0.0","zod":"^3.0.0"}}' + package_json = '{"type":"module","dependencies":{"ai":"^6.0.0","@ai-sdk/anthropic":"^3.0.0","@ai-sdk/openai":"^3.0.0","@ai-sdk/google":"^3.0.0","zod":"^3.23.0"}}' sandbox.files.write('/app/package.json', package_json) sandbox.commands.run("cd /app && npm install -q", timeout=120) print("[E2B] Node.js dependencies installed") @@ -121,13 +122,13 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): # Run the agent with env vars passed directly to the command print("[E2B] Starting agent process...") agent_envs = { - 'REPO': repo, + 'REPO': head_repo, 'COMMIT_SHA': commit_sha, 'BRANCH': branch_name, 'GITHUB_TOKEN': token, 'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""), 'OPENAI_API_KEY': os.environ.get("OPENAI_API_KEY", ""), - 'GOOGLE_API_KEY': os.environ.get("GOOGLE_API_KEY", ""), + 'GOOGLE_GENERATIVE_AI_API_KEY': os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY", ""), 'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"), 'MODEL': os.environ.get("MODEL", "claude-haiku-4-5-20251001"), } @@ -232,7 +233,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int): print("[E2B] Review task completed") @app.post('/webhook') -async def webhook_handler(request: Request, background_tasks: BackgroundTasks): +async def webhook_handler(request: Request): # Verify signature signature = request.headers.get('X-Hub-Signature-256') body = await request.body() @@ -242,20 +243,26 @@ async def webhook_handler(request: Request, background_tasks: BackgroundTasks): event = json.loads(body) action = event.get('action') + event_type = request.headers.get('X-GitHub-Event', 'unknown') + print(f"[Webhook] event={event_type} action={action}") # Only process PR open/update events if action in ['opened', 'reopened', 'synchronize']: - pr_number = event['pull_request']['number'] - repo = event['pull_request']['base']['repo']['full_name'] - installation_id = event['installation']['id'] - - # Run review in E2B sandbox in background (don't block webhook response) - background_tasks.add_task( - run_review_in_e2b, - repo=repo, - pr_number=pr_number, - installation_id=installation_id + try: + pr_number = event['pull_request']['number'] + repo = event['pull_request']['base']['repo']['full_name'] + installation_id = event['installation']['id'] + except KeyError as e: + print(f"[Webhook] Missing key in payload: {e} — keys: {list(event.keys())}") + return {'status': 'ok'} + + print(f"[Webhook] Scheduling review for PR #{pr_number} in {repo} (installation={installation_id})", flush=True) + thread = threading.Thread( + target=run_review_in_e2b, + kwargs={'repo': repo, 'pr_number': pr_number, 'installation_id': installation_id}, + daemon=True ) + thread.start() # Respond quickly (GitHub expects <30 sec) return {'status': 'ok'}