From b9f02ec44bb0aa6b47c55126ff21d68b2c08f2c8 Mon Sep 17 00:00:00 2001
From: vineeshah <vineeshah2006@gmail.com>
Date: Wed, 8 Apr 2026 12:55:25 -0700
Subject: [PATCH 1/5] feat : anthropic agent sdk on e2b

---
 agent.py         | 209 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |   3 +-
 review.py        | 146 ++++++++++++++++++++++++++++++++-
 server.py        | 205 +++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 559 insertions(+), 4 deletions(-)
 create mode 100644 agent.py

diff --git a/agent.py b/agent.py
new file mode 100644
index 0000000..e5bbdcc
--- /dev/null
+++ b/agent.py
@@ -0,0 +1,209 @@
+import os
+import subprocess
+import json
+import sys
+from anthropic import Anthropic
+
+
+def run_bash(command: str, timeout: int = 30) -> str:
+    """Run a shell command in the cloned repo directory with secrets stripped from env."""
+    try:
+        # Minimal env — no secrets accessible to shell commands (prevents prompt injection exfiltration)
+        safe_env = {
+            "PATH": os.environ.get("PATH", "/usr/bin:/bin:/usr/local/bin"),
+            "HOME": os.environ.get("HOME", "/root"),
+            "LANG": os.environ.get("LANG", "en_US.UTF-8"),
+        }
+        result = subprocess.run(
+            command, shell=True, capture_output=True,
+            text=True, timeout=timeout, cwd='/tmp/repo',
+            env=safe_env
+        )
+        output = result.stdout
+        if result.stderr:
+            output += f"\n[stderr]: {result.stderr}"
+        if not output.strip():
+            return f"[exit code {result.returncode}, no output]"
+        if len(output) > 8000:
+            output = output[:8000] + f"\n...[truncated, {len(output)} total chars]"
+        return output
+    except subprocess.TimeoutExpired:
+        return f"ERROR: Command timed out after {timeout}s"
+    except Exception as e:
+        return f"ERROR: {str(e)}"
+
+
+TOOLS = [
+    {
+        "name": "bash",
+        "description": "Run a shell command in the repository root.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "command": {"type": "string", "description": "The shell command to run"}
+            },
+            "required": ["command"]
+        }
+    }
+]
+
+SYSTEM_PROMPT = """You are a senior software engineer reviewing a pull request.
+
+The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.
+
+Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). Use whichever makes more sense for each issue.
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.
+"""
+
+
+def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
+    """Run the agentic loop using Anthropic's tool use API."""
+    messages = [
+        {
+            "role": "user",
+            "content": f"Please review this pull request:\n\n{pr_diff}"
+        }
+    ]
+    tool_call_count = 0
+
+    while True:
+        response = client.messages.create(
+            model="claude-haiku-4-5",
+            max_tokens=4096,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages
+        )
+
+        messages.append({"role": "assistant", "content": response.content})
+
+        if response.stop_reason == "end_turn":
+            for block in response.content:
+                if hasattr(block, 'text'):
+                    return block.text
+            return ""
+
+        if response.stop_reason == "tool_use":
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_call_count += 1
+                    print(f"Tool call {tool_call_count}/{max_tool_calls}: {block.name}({json.dumps(block.input)})", file=sys.stderr)
+                    result = run_bash(block.input["command"]) if block.name == "bash" else f"Unknown tool: {block.name}"
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": result
+                    })
+
+            messages.append({"role": "user", "content": tool_results})
+
+            if tool_call_count >= max_tool_calls:
+                print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr)
+                response = client.messages.create(
+                    model="claude-haiku-4-5",
+                    max_tokens=4096,
+                    system=SYSTEM_PROMPT,
+                    messages=messages
+                )
+                for block in response.content:
+                    if hasattr(block, 'text'):
+                        return block.text
+                return ""
+        else:
+            break
+
+    return ""
+
+
+def main():
+    repo_name = os.environ.get('REPO')
+    commit_sha = os.environ.get('COMMIT_SHA')
+    branch_name = os.environ.get('BRANCH', 'main')
+    anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY')
+    github_token = os.environ.get('GITHUB_TOKEN')
+    max_tool_calls = int(os.environ.get('MAX_TOOL_CALLS', '10'))
+
+    if not all([repo_name, commit_sha, anthropic_api_key, github_token]):
+        print(json.dumps({"error": "Missing required environment variables", "findings": []}))
+        sys.exit(1)
+
+    # Clone the repository at the PR commit
+    print("Cloning repository...", file=sys.stderr)
+    clone_dir = '/tmp/repo'
+    clone_url = f"https://{github_token}@github.com/{repo_name}.git"
+
+    try:
+        subprocess.run(['rm', '-rf', clone_dir], check=True)
+        subprocess.run(
+            ['git', 'clone', '--depth=1', '--branch', branch_name, clone_url, clone_dir],
+            check=True, capture_output=True,
+            env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'}
+        )
+        print(f"Cloned {repo_name} branch {branch_name} (shallow)", file=sys.stderr)
+    except subprocess.CalledProcessError as e:
+        print(json.dumps({"error": f"Failed to clone: {e.stderr.decode() if e.stderr else str(e)}", "findings": []}))
+        sys.exit(1)
+
+    os.chdir(clone_dir)
+
+    # Read PR diff from file written by server.py
+    try:
+        with open('/app/pr.diff', 'r', encoding='utf-8') as f:
+            pr_diff = f.read()
+    except Exception as e:
+        print(f"Warning: Could not read PR diff ({e})", file=sys.stderr)
+        pr_diff = ""
+
+    # Run the agentic review
+    print("Running agent...", file=sys.stderr)
+    client = Anthropic(api_key=anthropic_api_key)
+
+    try:
+        final_response = run_agent(client, pr_diff, max_tool_calls)
+
+        if not final_response:
+            raise ValueError("No response from agent")
+
+        # Strip markdown fences if present
+        cleaned = final_response.strip()
+        if cleaned.startswith('```'):
+            lines = cleaned.split('\n')[1:]
+            if lines and lines[-1].strip() == '```':
+                lines = lines[:-1]
+            cleaned = '\n'.join(lines).strip()
+
+        output = json.loads(cleaned)
+        if not isinstance(output.get('findings'), list):
+            output['findings'] = []
+        output['findings'] = output['findings'][:3]
+
+        print(json.dumps(output))
+        sys.exit(0)
+
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []}))
+        sys.exit(1)
+    except Exception as e:
+        print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []}))
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
index 137a9b2..31f88f1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
-openai
+e2b>=1.0.0
+anthropic>=0.25.0
 requests
 python-dotenv
 fastapi
diff --git a/review.py b/review.py
index 50d475f..a7a3b93 100644
--- a/review.py
+++ b/review.py
@@ -40,6 +40,43 @@
 # Helper Functions
 # -------------------------
 
+def clean_and_parse_json(text: str):
+    """Clean LLM response and parse JSON, removing markdown fences and extracting the first JSON object."""
+    if not text:
+        raise ValueError("Empty response")
+
+    cleaned = text.strip()
+
+    # Remove markdown code fences (```json ... ``` or ``` ... ```)
+    if cleaned.startswith("```"):
+        lines = cleaned.split('\n')
+        # Skip first line if it's a fence
+        if lines[0].strip().startswith("```"):
+            lines = lines[1:]
+        # Remove last line if it's a closing fence
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        cleaned = '\n'.join(lines).strip()
+
+    # Try direct parse first
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass  # Continue to brace extraction
+
+    # Find the first '{' and last '}' to extract a JSON object
+    start = cleaned.find('{')
+    end = cleaned.rfind('}')
+    if start != -1 and end != -1 and end > start:
+        candidate = cleaned[start:end+1]
+        try:
+            return json.loads(candidate)
+        except json.JSONDecodeError:
+            pass
+
+    # If we get here, parsing failed
+    raise ValueError(f"Could not extract valid JSON from response (first 200 chars): {text[:200]}...")
+
 def should_skip_file(filename):
     """Check if a file should be skipped based on extension or directory."""
     # Check extension
@@ -189,6 +226,7 @@ def process_review(repo: str, pr_number: int, installation_id: int):
   "comments": [
     {{
       "line": <line number>,
+      "severity": "<critical|high|medium|low>",
       "message": "<review comment>"
     }}
   ]
@@ -197,6 +235,8 @@ def process_review(repo: str, pr_number: int, installation_id: int):
 If there are no issues, return:
 
 {{ "comments": [] }}
+
+IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting (such as ```json code fences```).
 """
 
         try:
@@ -210,18 +250,122 @@ def process_review(repo: str, pr_number: int, installation_id: int):
             )
 
             result = response.choices[0].message.content
-            data = json.loads(result)
+            try:
+                data = clean_and_parse_json(result)
+            except Exception as e:
+                print(f"Failed to parse LLM response for {filename}: {e}")
+                continue
 
             for c in data["comments"]:
                 comments.append({
                     "path": filename,
                     "line": c["line"],
+                    "severity": c.get("severity", "medium"),
                     "body": c["message"]
                 })
 
         except Exception as e:
             print(f"LLM review failed for {filename}: {e}")
 
+    # Second pass: Consolidate and prioritize all comments (max 5 total)
+    all_file_comments = comments  # Save all collected comments
+    print(f"\nCollected {len(all_file_comments)} potential issues. Consolidating to max {MAX_COMMENTS_PER_PR}...")
+
+    comments = []
+    if all_file_comments:
+        # Build a summary of all issues for prioritization
+        issues_summary = ""
+        for idx, comment in enumerate(all_file_comments, 1):
+            issues_summary += f"{idx}. [{comment['severity'].upper()}] {comment['path']}:{comment['line']} - {comment['body'][:100]}...\n"
+
+        consolidation_prompt = f"""
+You are a senior software engineer conducting a final PR review.
+
+The following potential issues were identified across all files:
+
+{issues_summary}
+
+Select at most {MAX_COMMENTS_PER_PR} of the MOST CRITICAL issues to actually comment on.
+
+IMPORTANT RULES:
+- Prioritize critical and high severity issues first
+- Skip low-severity issues (style, nitpicks) unless there's something really important
+- If multiple issues are related, consider if they can be combined into a single comment
+- Be conservative - it's better to have fewer, more impactful comments than many minor ones
+- **You MUST include at least one comment (either a selected issue or a summary comment), even if the code looks perfect.** If no significant issues are found, provide a positive or neutral summary comment like "No significant issues found. The changes look good."
+
+Return JSON in this format:
+
+{{
+  "selected_issues": [
+    {{
+      "original_index": <index from list above>,
+      "comment": {{
+        "line": <line number>,
+        "message": "<final review comment - refine if needed for clarity>"
+      }}
+    }}
+  ],
+  "summary_comment": "<if you want to add general feedback instead of or in addition to specific line comments. This can be used to satisfy the 'at least one comment' rule when there are no issues.>"
+}}
+
+Do NOT return empty strings. Ensure either selected_issues contains at least one item OR summary_comment is non-empty.
+
+IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting.
+"""
+
+        try:
+            response = client.chat.completions.create(
+                model="gpt-4.1-mini",
+                messages=[
+                    {"role": "system", "content": "You are a senior software engineer with extreme technical expertise."},
+                    {"role": "user", "content": consolidation_prompt}
+                ],
+                temperature=0
+            )
+
+            result = response.choices[0].message.content
+            try:
+                data = clean_and_parse_json(result)
+            except Exception as e:
+                raise ValueError(f"Consolidation JSON parsing failed: {e}")
+
+            # Add selected specific comments
+            for selected in data["selected_issues"][:MAX_COMMENTS_PER_PR]:
+                idx = selected["original_index"] - 1
+                if 0 <= idx < len(all_file_comments):
+                    original = all_file_comments[idx]
+                    comments.append({
+                        "path": original["path"],
+                        "line": selected["comment"]["line"],
+                        "body": selected["comment"]["message"]
+                    })
+
+            # Optionally add summary comment if present and we have room
+            if data.get("summary_comment") and len(comments) < MAX_COMMENTS_PER_PR:
+                # Summary comments are posted as general PR comments (no line number)
+                comments.append({
+                    "path": None,  # Indicates general PR comment
+                    "line": None,
+                    "body": data["summary_comment"]
+                })
+
+        except Exception as e:
+            print(f"Consolidation failed, falling back to all comments: {e}")
+            # Fallback: just use all comments but respect the limit
+            comments = [
+                {"path": c["path"], "line": c["line"], "body": c["body"]}
+                for c in all_file_comments[:MAX_COMMENTS_PER_PR]
+            ]
+
+    # Ensure at least one comment is posted, even if no issues were found
+    if len(comments) == 0:
+        comments.append({
+            "path": None,
+            "line": None,
+            "body": "Review complete: No significant issues found. The changes look good."
+        })
+
     # -------------------------
     # Get commit SHA
     # -------------------------
diff --git a/server.py b/server.py
index 4d85de4..4f6c9b8 100644
--- a/server.py
+++ b/server.py
@@ -3,12 +3,18 @@
 import hashlib
 import os
 import json
+import threading
+import requests
+from e2b import Sandbox
 from github_app import GitHubAppAuth
 from review import process_review
 
 app = FastAPI(title="PR Review Bot")
 WEBHOOK_SECRET = os.environ.get('GITHUB_WEBHOOK_SECRET')
 
+# Semaphore to limit concurrent E2B sandboxes (max 3)
+e2b_semaphore = threading.Semaphore(3)
+
 def verify_signature(request_body: bytes, signature: str) -> bool:
     """Verify webhook signature from GitHub."""
     if not signature or not WEBHOOK_SECRET:
@@ -22,6 +28,201 @@ def verify_signature(request_body: bytes, signature: str) -> bool:
 
     return hmac.compare_digest(signature, expected)
 
+def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
+    """
+    Run PR review using E2B sandbox and Anthropic Agent SDK.
+    Falls back to process_review if E2B fails.
+    """
+    print(f"[E2B] Starting review for PR #{pr_number} in {repo}")
+
+    # Acquire semaphore to limit concurrent sandboxes
+    acquired = False
+    try:
+        acquired = e2b_semaphore.acquire(blocking=True, timeout=60)
+        if not acquired:
+            print("[E2B] Timeout acquiring semaphore, falling back to local review")
+            process_review(repo, pr_number, installation_id)
+            return
+    except Exception as e:
+        print(f"[E2B] Semaphore error: {e}, falling back to local review")
+        process_review(repo, pr_number, installation_id)
+        return
+
+    sandbox = None
+    try:
+        # Get GitHub App auth token
+        auth = GitHubAppAuth(
+            app_id=os.environ["GITHUB_APP_ID"],
+            private_key_path=os.environ["GITHUB_PRIVATE_KEY_PATH"],
+            installation_id=installation_id
+        )
+        token = auth.get_installation_token()
+
+        # Setup headers with installation token
+        headers = {
+            "Authorization": f"token {token}",
+            "Accept": "application/vnd.github+json"
+        }
+
+        # Fetch PR data to get diff and commit SHA
+        pr_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
+        try:
+            pr_response = requests.get(pr_url, headers=headers)
+            pr_response.raise_for_status()
+            pr_data = pr_response.json()
+            commit_sha = pr_data["head"]["sha"]
+            branch_name = pr_data["head"]["ref"]
+        except Exception as e:
+            print(f"[E2B] Failed to fetch PR data: {e}, falling back to local review")
+            process_review(repo, pr_number, installation_id)
+            return
+
+        # Get PR diff (unified diff format)
+        diff_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files"
+        try:
+            diff_response = requests.get(diff_url, headers=headers)
+            diff_response.raise_for_status()
+            files_data = diff_response.json()
+
+            # Build unified diff from files data
+            diff_parts = []
+            for file in files_data:
+                if 'patch' in file:
+                    diff_parts.append(f"--- a/{file['filename']}")
+                    diff_parts.append(f"+++ b/{file['filename']}")
+                    diff_parts.append(file['patch'])
+            pr_diff = '\n'.join(diff_parts)
+        except Exception as e:
+            print(f"[E2B] Failed to fetch PR diff: {e}, falling back to local review")
+            process_review(repo, pr_number, installation_id)
+            return
+
+        # Create E2B sandbox
+        print("[E2B] Creating sandbox...")
+        e2b_api_key = os.environ.get("E2B_API_KEY")
+        if not e2b_api_key:
+            raise ValueError("E2B_API_KEY not set")
+
+        sandbox = Sandbox(api_key=e2b_api_key, template="claude", timeout=600)#id is wunszvjeuyrdgrt0z6o9
+        print("[E2B] Sandbox created")
+
+        # Upload agent.py to sandbox
+        with open('/app/agent.py', 'r') as f:
+            agent_code = f.read()
+        sandbox.files.write('/app/agent.py', agent_code)
+        print("[E2B] agent.py uploaded")
+
+        # Write PR diff to a file to avoid env var size limits for large PRs
+        sandbox.files.write('/app/pr.diff', pr_diff)
+        print("[E2B] PR diff written to /app/pr.diff")
+
+        # Run the agent with env vars passed directly to the command
+        print("[E2B] Starting agent process...")
+        agent_envs = {
+            'REPO': repo,
+            'COMMIT_SHA': commit_sha,
+            'BRANCH': branch_name,
+            'GITHUB_TOKEN': token,
+            'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""),
+            'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"),
+        }
+
+        stdout_chunks = []
+        sandbox.commands.run(
+            "python /app/agent.py",
+            envs=agent_envs,
+            timeout=580,
+            on_stdout=lambda data: stdout_chunks.append(data),
+            on_stderr=lambda data: print(f"[Agent] {data}", end='', flush=True)
+        )
+        stdout = ''.join(stdout_chunks)
+
+        print("[E2B] Agent process completed")
+
+        # Parse the JSON output
+        try:
+            output = json.loads(stdout)
+            findings = output.get('findings', [])
+
+            # Cap at 3 findings
+            findings = findings[:3]
+            print(f"[E2B] Review completed: {len(findings)} findings")
+
+            # Build set of files in this PR for validation of inline comments
+            pr_files = {f['filename'] for f in files_data}
+
+            for finding in findings:
+                file_path = finding.get('file')
+                line = finding.get('line')
+                severity = finding.get('severity', 'medium')
+                message = finding.get('message', '')
+
+                if not message:
+                    continue
+
+                comment_body = f"**[{severity.upper()}]** {message}"
+
+                if file_path and line:
+                    # Inline comment — validate file is in the PR diff
+                    if file_path not in pr_files:
+                        print(f"[E2B] Skipping inline finding: {file_path} not in PR diff")
+                        continue
+                    comment_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
+                    payload = {
+                        "body": comment_body,
+                        "commit_id": commit_sha,
+                        "path": file_path,
+                        "line": line
+                    }
+                    label = f"{file_path}:{line}"
+                else:
+                    # Overall assessment — post as a general PR comment
+                    comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+                    payload = {"body": comment_body}
+                    label = "overall"
+
+                try:
+                    response = requests.post(comment_url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    print(f"[E2B] Posted comment ({label})")
+                except Exception as e:
+                    print(f"[E2B] Failed to post comment ({label}): {e}")
+
+            if not findings:
+                comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+                payload = {"body": "Review complete: No significant issues found. The changes look good."}
+                try:
+                    response = requests.post(comment_url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    print("[E2B] Posted summary comment")
+                except Exception as e:
+                    print(f"[E2B] Failed to post summary comment: {e}")
+
+        except json.JSONDecodeError as e:
+            print(f"[E2B] Failed to parse agent output: {e}")
+            print(f"[E2B] Agent output (first 500 chars): {stdout[:500]}")
+            print("[E2B] Falling back to local review")
+            process_review(repo, pr_number, installation_id)
+
+    except Exception as e:
+        print(f"[E2B] Error during review: {e}")
+        print("[E2B] Falling back to local review")
+        process_review(repo, pr_number, installation_id)
+
+    finally:
+        # Ensure sandbox is closed
+        if sandbox:
+            try:
+                sandbox.close()
+                print("[E2B] Sandbox closed")
+            except Exception as e:
+                print(f"[E2B] Error closing sandbox: {e}")
+
+        # Release semaphore
+        if acquired:
+            e2b_semaphore.release()
+        print("[E2B] Review task completed")
+
 @app.post('/webhook')
 async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
     # Verify signature
@@ -40,9 +241,9 @@ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
         repo = event['pull_request']['base']['repo']['full_name']
         installation_id = event['installation']['id']
 
-        # Run review in background (don't block webhook response)
+        # Run review in E2B sandbox in background (don't block webhook response)
         background_tasks.add_task(
-            process_review,
+            run_review_in_e2b,
             repo=repo,
             pr_number=pr_number,
             installation_id=installation_id

From 96db2f23b666f822c920e0b71b73c8a92943120b Mon Sep 17 00:00:00 2001
From: vineeshah <vineeshah2006@gmail.com>
Date: Wed, 8 Apr 2026 13:57:04 -0700
Subject: [PATCH 2/5] fix: syntax

---
 requirements.txt |  1 +
 server.py        | 20 +++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 31f88f1..2cf69ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,6 @@
 e2b>=1.0.0
 anthropic>=0.25.0
+openai
 requests
 python-dotenv
 fastapi
diff --git a/server.py b/server.py
index 4f6c9b8..7742e63 100644
--- a/server.py
+++ b/server.py
@@ -99,11 +99,10 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
 
         # Create E2B sandbox
         print("[E2B] Creating sandbox...")
-        e2b_api_key = os.environ.get("E2B_API_KEY")
-        if not e2b_api_key:
+        if not os.environ.get("E2B_API_KEY"):
             raise ValueError("E2B_API_KEY not set")
 
-        sandbox = Sandbox(api_key=e2b_api_key, template="claude", timeout=600)#id is wunszvjeuyrdgrt0z6o9
+        sandbox = Sandbox(template="claude", timeout=600)
         print("[E2B] Sandbox created")
 
         # Upload agent.py to sandbox
@@ -185,8 +184,19 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
                     response = requests.post(comment_url, headers=headers, json=payload)
                     response.raise_for_status()
                     print(f"[E2B] Posted comment ({label})")
-                except Exception as e:
-                    print(f"[E2B] Failed to post comment ({label}): {e}")
+                except requests.HTTPError as e:
+                    if e.response.status_code == 422 and file_path and line:
+                        # Line not in diff — fall back to general issue comment
+                        print(f"[E2B] Inline comment rejected (line not in diff), posting as issue comment ({label})")
+                        fallback_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+                        fallback_payload = {"body": f"**[{severity.upper()}]** `{file_path}` (line {line}): {message}"}
+                        try:
+                            requests.post(fallback_url, headers=headers, json=fallback_payload).raise_for_status()
+                            print(f"[E2B] Posted fallback comment ({label})")
+                        except Exception as e2:
+                            print(f"[E2B] Failed to post fallback comment: {e2}")
+                    else:
+                        print(f"[E2B] Failed to post comment ({label}): {e}")
 
             if not findings:
                 comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"

From eccf29b0d5aca4047dc2dcd7096942445a8d0401 Mon Sep 17 00:00:00 2001
From: vineeshah <vineeshah2006@gmail.com>
Date: Wed, 8 Apr 2026 17:00:45 -0700
Subject: [PATCH 3/5] fixes suggested by the bot

---
 agent.py  | 13 +++++++------
 server.py | 49 +++++++++++++++++++++----------------------------
 2 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/agent.py b/agent.py
index e5bbdcc..12cfd8e 100644
--- a/agent.py
+++ b/agent.py
@@ -84,7 +84,7 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
 
     while True:
         response = client.messages.create(
-            model="claude-haiku-4-5",
+            model="claude-haiku-4-5-20251001",
             max_tokens=4096,
             system=SYSTEM_PROMPT,
             tools=TOOLS,
@@ -94,8 +94,9 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
         messages.append({"role": "assistant", "content": response.content})
 
         if response.stop_reason == "end_turn":
+            print(f"end_turn content: {response.content}", file=sys.stderr)
             for block in response.content:
-                if hasattr(block, 'text'):
+                if hasattr(block, 'text') and block.text.strip():
                     return block.text
             return ""
 
@@ -116,14 +117,16 @@ def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
 
             if tool_call_count >= max_tool_calls:
                 print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr)
+                messages.append({"role": "user", "content": "You've used the maximum number of tool calls. Based on everything you've seen, provide your final review as JSON now."})
                 response = client.messages.create(
-                    model="claude-haiku-4-5",
+                    model="claude-haiku-4-5-20251001",
                     max_tokens=4096,
                     system=SYSTEM_PROMPT,
                     messages=messages
                 )
+                print(f"Final response stop_reason={response.stop_reason} content={response.content}", file=sys.stderr)
                 for block in response.content:
-                    if hasattr(block, 'text'):
+                    if hasattr(block, 'text') and block.text.strip():
                         return block.text
                 return ""
         else:
@@ -199,10 +202,8 @@ def main():
 
     except json.JSONDecodeError as e:
         print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []}))
-        sys.exit(1)
     except Exception as e:
         print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []}))
-        sys.exit(1)
 
 
 if __name__ == '__main__':
diff --git a/server.py b/server.py
index 7742e63..17ef17a 100644
--- a/server.py
+++ b/server.py
@@ -7,7 +7,6 @@
 import requests
 from e2b import Sandbox
 from github_app import GitHubAppAuth
-from review import process_review
 
 app = FastAPI(title="PR Review Bot")
 WEBHOOK_SECRET = os.environ.get('GITHUB_WEBHOOK_SECRET')
@@ -31,7 +30,7 @@ def verify_signature(request_body: bytes, signature: str) -> bool:
 def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
     """
     Run PR review using E2B sandbox and Anthropic Agent SDK.
-    Falls back to process_review if E2B fails.
+    Runs PR review using E2B sandbox and Anthropic agent.
     """
     print(f"[E2B] Starting review for PR #{pr_number} in {repo}")
 
@@ -40,15 +39,14 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
     try:
         acquired = e2b_semaphore.acquire(blocking=True, timeout=60)
         if not acquired:
-            print("[E2B] Timeout acquiring semaphore, falling back to local review")
-            process_review(repo, pr_number, installation_id)
+            print("[E2B] Timeout acquiring semaphore, skipping review")
             return
     except Exception as e:
-        print(f"[E2B] Semaphore error: {e}, falling back to local review")
-        process_review(repo, pr_number, installation_id)
+        print(f"[E2B] Semaphore error: {e}")
         return
 
     sandbox = None
+    stdout_chunks = []
     try:
         # Get GitHub App auth token
         auth = GitHubAppAuth(
@@ -73,8 +71,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
             commit_sha = pr_data["head"]["sha"]
             branch_name = pr_data["head"]["ref"]
         except Exception as e:
-            print(f"[E2B] Failed to fetch PR data: {e}, falling back to local review")
-            process_review(repo, pr_number, installation_id)
+            print(f"[E2B] Failed to fetch PR data: {e}")
             return
 
         # Get PR diff (unified diff format)
@@ -93,8 +90,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
                     diff_parts.append(file['patch'])
             pr_diff = '\n'.join(diff_parts)
         except Exception as e:
-            print(f"[E2B] Failed to fetch PR diff: {e}, falling back to local review")
-            process_review(repo, pr_number, installation_id)
+            print(f"[E2B] Failed to fetch PR diff: {e}")
             return
 
         # Create E2B sandbox
@@ -102,7 +98,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         if not os.environ.get("E2B_API_KEY"):
             raise ValueError("E2B_API_KEY not set")
 
-        sandbox = Sandbox(template="claude", timeout=600)
+        sandbox = Sandbox.create(template="claude", timeout=300)
         print("[E2B] Sandbox created")
 
         # Upload agent.py to sandbox
@@ -115,6 +111,11 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         sandbox.files.write('/app/pr.diff', pr_diff)
         print("[E2B] PR diff written to /app/pr.diff")
 
+        # Install dependencies in the sandbox
+        print("[E2B] Installing dependencies...")
+        sandbox.commands.run("pip3 install anthropic -q --break-system-packages", timeout=120)
+        print("[E2B] Dependencies installed")
+
         # Run the agent with env vars passed directly to the command
         print("[E2B] Starting agent process...")
         agent_envs = {
@@ -128,9 +129,9 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
 
         stdout_chunks = []
         sandbox.commands.run(
-            "python /app/agent.py",
+            "python3 /app/agent.py",
             envs=agent_envs,
-            timeout=580,
+            timeout=290,
             on_stdout=lambda data: stdout_chunks.append(data),
             on_stderr=lambda data: print(f"[Agent] {data}", end='', flush=True)
         )
@@ -139,6 +140,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         print("[E2B] Agent process completed")
 
         # Parse the JSON output
+        print(f"[E2B] Agent stdout: {stdout[:500]}")
         try:
             output = json.loads(stdout)
             findings = output.get('findings', [])
@@ -199,34 +201,25 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
                         print(f"[E2B] Failed to post comment ({label}): {e}")
 
             if not findings:
-                comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
-                payload = {"body": "Review complete: No significant issues found. The changes look good."}
-                try:
-                    response = requests.post(comment_url, headers=headers, json=payload)
-                    response.raise_for_status()
-                    print("[E2B] Posted summary comment")
-                except Exception as e:
-                    print(f"[E2B] Failed to post summary comment: {e}")
+                print("[E2B] No findings, skipping comment")
 
         except json.JSONDecodeError as e:
             print(f"[E2B] Failed to parse agent output: {e}")
             print(f"[E2B] Agent output (first 500 chars): {stdout[:500]}")
-            print("[E2B] Falling back to local review")
-            process_review(repo, pr_number, installation_id)
 
     except Exception as e:
         print(f"[E2B] Error during review: {e}")
-        print("[E2B] Falling back to local review")
-        process_review(repo, pr_number, installation_id)
+        if stdout_chunks:
+            print(f"[E2B] Agent stdout: {''.join(stdout_chunks)}")
 
     finally:
         # Ensure sandbox is closed
         if sandbox:
             try:
-                sandbox.close()
-                print("[E2B] Sandbox closed")
+                sandbox.kill()
+                print("[E2B] Sandbox killed")
             except Exception as e:
-                print(f"[E2B] Error closing sandbox: {e}")
+                print(f"[E2B] Error killing sandbox: {e}")
 
         # Release semaphore
         if acquired:

From 2ee04eab6eef5335f41d3c87f5b78c7e1e5fbcb4 Mon Sep 17 00:00:00 2001
From: vineeshah <vineeshah2006@gmail.com>
Date: Wed, 8 Apr 2026 17:38:27 -0700
Subject: [PATCH 4/5] vercel ai sdk

---
 agent.mjs | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 server.py |  23 ++++----
 2 files changed, 170 insertions(+), 9 deletions(-)
 create mode 100644 agent.mjs

diff --git a/agent.mjs b/agent.mjs
new file mode 100644
index 0000000..2c746ef
--- /dev/null
+++ b/agent.mjs
@@ -0,0 +1,156 @@
+import { generateText, tool } from 'ai';
+import { createAnthropic } from '@ai-sdk/anthropic';
+import { createOpenAI } from '@ai-sdk/openai';
+import { createGoogleGenerativeAI } from '@ai-sdk/google';
+import { z } from 'zod';
+import { spawnSync } from 'child_process';
+import { readFileSync } from 'fs';
+
+function runBash(command, timeout = 30000) {
+  const safeEnv = {
+    PATH: process.env.PATH || '/usr/bin:/bin:/usr/local/bin',
+    HOME: process.env.HOME || '/root',
+    LANG: process.env.LANG || 'en_US.UTF-8',
+  };
+
+  const result = spawnSync('sh', ['-c', command], {
+    cwd: '/tmp/repo',
+    env: safeEnv,
+    timeout,
+    encoding: 'utf8',
+  });
+
+  if (result.error?.code === 'ETIMEDOUT') return `ERROR: Command timed out after ${timeout / 1000}s`;
+  if (result.error) return `ERROR: ${result.error.message}`;
+
+  let output = result.stdout || '';
+  if (result.stderr) output += `\n[stderr]: ${result.stderr}`;
+  if (!output.trim()) return `[exit code ${result.status}, no output]`;
+  if (output.length > 8000) output = output.slice(0, 8000) + `\n...[truncated, ${output.length} total chars]`;
+  return output;
+}
+
+const SYSTEM_PROMPT = `You are a senior software engineer reviewing a pull request.
+
+The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.
+
+Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line).
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.`;
+
+function getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey) {
+  if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3')) {
+    return createOpenAI({ apiKey: openaiApiKey })(modelId);
+  }
+  if (modelId.startsWith('gemini-')) {
+    return createGoogleGenerativeAI({ apiKey: googleApiKey })(modelId);
+  }
+  return createAnthropic({ apiKey: anthropicApiKey })(modelId);
+}
+
+async function main() {
+  const repoName = process.env.REPO;
+  const branchName = process.env.BRANCH || 'main';
+  const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
+  const openaiApiKey = process.env.OPENAI_API_KEY || '';
+  const googleApiKey = process.env.GOOGLE_API_KEY || '';
+  const githubToken = process.env.GITHUB_TOKEN;
+  const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10');
+  const modelId = process.env.MODEL || 'claude-haiku-4-5-20251001';
+
+  if (!repoName || !anthropicApiKey || !githubToken) {
+    console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] }));
+    process.exit(0);
+  }
+
+  // Clone the repository
+  process.stderr.write('Cloning repository...\n');
+  const cloneDir = '/tmp/repo';
+  const cloneUrl = `https://${githubToken}@github.com/${repoName}.git`;
+
+  spawnSync('rm', ['-rf', cloneDir]);
+  const cloneResult = spawnSync(
+    'git',
+    ['clone', '--depth=1', '--branch', branchName, cloneUrl, cloneDir],
+    { encoding: 'utf8', env: { ...process.env, GIT_TERMINAL_PROMPT: '0' } }
+  );
+
+  if (cloneResult.status !== 0) {
+    console.log(JSON.stringify({ error: `Failed to clone: ${cloneResult.stderr || 'unknown error'}`, findings: [] }));
+    process.exit(0);
+  }
+  process.stderr.write(`Cloned ${repoName} branch ${branchName} (shallow)\n`);
+
+  // Strip token from git config to prevent exfiltration via bash tool
+  spawnSync('git', ['remote', 'set-url', 'origin', `https://github.com/${repoName}.git`], {
+    cwd: cloneDir, encoding: 'utf8',
+  });
+
+  // Read PR diff
+  let prDiff = '';
+  try {
+    prDiff = readFileSync('/app/pr.diff', 'utf8');
+  } catch (e) {
+    process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`);
+  }
+
+  process.stderr.write('Running agent...\n');
+
+  try {
+    const model = getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey);
+    let stepCount = 0;
+
+    const { text } = await generateText({
+      model,
+      system: SYSTEM_PROMPT,
+      prompt: `Please review this pull request:\n\n${prDiff}`,
+      maxSteps,
+      tools: {
+        bash: tool({
+          description: 'Run a shell command in the repository root.',
+          parameters: z.object({ command: z.string() }),
+          execute: async ({ command }) => {
+            stepCount++;
+            process.stderr.write(`Tool call ${stepCount}/${maxSteps}: bash(${JSON.stringify({ command })})\n`);
+            return runBash(command);
+          },
+        }),
+      },
+    });
+
+    // Strip markdown fences if present
+    let cleaned = text.trim();
+    if (cleaned.startsWith('```')) {
+      const lines = cleaned.split('\n').slice(1);
+      if (lines.at(-1)?.trim() === '```') lines.pop();
+      cleaned = lines.join('\n').trim();
+    }
+
+    const output = JSON.parse(cleaned);
+    if (!Array.isArray(output.findings)) output.findings = [];
+    output.findings = output.findings.slice(0, 3);
+
+    console.log(JSON.stringify(output));
+    process.exit(0);
+  } catch (e) {
+    console.log(JSON.stringify({ error: `Agent failed: ${e.message}`, findings: [] }));
+    process.exit(0);
+  }
+}
+
+main();
diff --git a/server.py b/server.py
index 17ef17a..5e19675 100644
--- a/server.py
+++ b/server.py
@@ -101,20 +101,22 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         sandbox = Sandbox.create(template="claude", timeout=300)
         print("[E2B] Sandbox created")
 
-        # Upload agent.py to sandbox
-        with open('/app/agent.py', 'r') as f:
+        # Upload agent.mjs to sandbox
+        with open('/app/agent.mjs', 'r') as f:
             agent_code = f.read()
-        sandbox.files.write('/app/agent.py', agent_code)
-        print("[E2B] agent.py uploaded")
+        sandbox.files.write('/app/agent.mjs', agent_code)
+        print("[E2B] agent.mjs uploaded")
 
         # Write PR diff to a file to avoid env var size limits for large PRs
         sandbox.files.write('/app/pr.diff', pr_diff)
         print("[E2B] PR diff written to /app/pr.diff")
 
-        # Install dependencies in the sandbox
-        print("[E2B] Installing dependencies...")
-        sandbox.commands.run("pip3 install anthropic -q --break-system-packages", timeout=120)
-        print("[E2B] Dependencies installed")
+        # Write package.json and install Node dependencies
+        print("[E2B] Installing Node.js dependencies...")
+        package_json = '{"type":"module","dependencies":{"ai":"^4.0.0","@ai-sdk/anthropic":"^1.0.0","@ai-sdk/openai":"^1.0.0","@ai-sdk/google":"^1.0.0","zod":"^3.0.0"}}'
+        sandbox.files.write('/app/package.json', package_json)
+        sandbox.commands.run("cd /app && npm install -q", timeout=120)
+        print("[E2B] Node.js dependencies installed")
 
         # Run the agent with env vars passed directly to the command
         print("[E2B] Starting agent process...")
@@ -124,12 +126,15 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
             'BRANCH': branch_name,
             'GITHUB_TOKEN': token,
             'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""),
+            'OPENAI_API_KEY': os.environ.get("OPENAI_API_KEY", ""),
+            'GOOGLE_API_KEY': os.environ.get("GOOGLE_API_KEY", ""),
             'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"),
+            'MODEL': os.environ.get("MODEL", "claude-haiku-4-5-20251001"),
         }
 
         stdout_chunks = []
         sandbox.commands.run(
-            "python3 /app/agent.py",
+            "node /app/agent.mjs",
             envs=agent_envs,
             timeout=290,
             on_stdout=lambda data: stdout_chunks.append(data),

From 2a7df0335741e64a63ad2a1a1d12dad3e108a668 Mon Sep 17 00:00:00 2001
From: vineeshah <vineeshah2006@gmail.com>
Date: Sat, 11 Apr 2026 09:47:09 -0700
Subject: [PATCH 5/5] sandbox debugging

---
 .dockerignore |  2 ++
 Dockerfile    |  3 +++
 agent.mjs     | 51 ++++++++++++++++++++++++++++-----------------------
 server.py     | 37 ++++++++++++++++++++++---------------
 4 files changed, 55 insertions(+), 38 deletions(-)
 create mode 100644 .dockerignore

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..683d6c2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+private-key.pem
+*.pem
diff --git a/Dockerfile b/Dockerfile
index f1e9e0f..abe3544 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,9 @@
 # Use Python 3.11 slim image
 FROM python:3.11-slim
 
+# Disable Python output buffering so logs appear immediately
+ENV PYTHONUNBUFFERED=1
+
 # Set working directory
 WORKDIR /app
 
diff --git a/agent.mjs b/agent.mjs
index 2c746ef..1c5f1bc 100644
--- a/agent.mjs
+++ b/agent.mjs
@@ -1,7 +1,7 @@
-import { generateText, tool } from 'ai';
-import { createAnthropic } from '@ai-sdk/anthropic';
-import { createOpenAI } from '@ai-sdk/openai';
-import { createGoogleGenerativeAI } from '@ai-sdk/google';
+import { generateText, tool, stepCountIs } from 'ai';
+import { anthropic } from '@ai-sdk/anthropic';
+import { openai } from '@ai-sdk/openai';
+import { google } from '@ai-sdk/google';
 import { z } from 'zod';
 import { spawnSync } from 'child_process';
 import { readFileSync } from 'fs';
@@ -30,11 +30,20 @@ function runBash(command, timeout = 30000) {
   return output;
 }
 
-const SYSTEM_PROMPT = `You are a senior software engineer reviewing a pull request.
+const SYSTEM_PROMPT = `You are a senior software engineer doing a thorough review of a pull request.
 
-The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.
+The repository is already cloned at the repo root. You have full bash access — use it liberally, there is no cost to running many commands.
 
-Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line).
+You MUST do all of the following before forming any conclusions:
+1. Read each changed file in full, not just the diff
+2. Find every caller and usage of any modified function, class, or symbol across the entire repo
+3. Read related files — tests, configs, dependent modules, anything that could be affected
+4. Check for edge cases: error handling, concurrency, security, null/undefined, type mismatches
+5. Run any additional commands needed to fully understand the impact
+
+Use as many bash calls as you need. Do not cut corners.
+
+Only after thorough exploration, return at most 3 findings as JSON — no other text. Focus on real bugs, security issues, or broken logic. Skip style nits.
 
 {
   "findings": [
@@ -53,27 +62,24 @@ Return at most 3 findings as JSON — no other text. Each finding can be an inli
 
 If there are no significant issues, return {"findings": []}.`;
 
-function getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey) {
-  if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3')) {
-    return createOpenAI({ apiKey: openaiApiKey })(modelId);
+function getModel(modelId) {
+  if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3') || modelId.startsWith('o4')) {
+    return openai(modelId);
   }
   if (modelId.startsWith('gemini-')) {
-    return createGoogleGenerativeAI({ apiKey: googleApiKey })(modelId);
+    return google(modelId);
   }
-  return createAnthropic({ apiKey: anthropicApiKey })(modelId);
+  return anthropic(modelId);
 }
 
 async function main() {
   const repoName = process.env.REPO;
   const branchName = process.env.BRANCH || 'main';
-  const anthropicApiKey = process.env.ANTHROPIC_API_KEY;
-  const openaiApiKey = process.env.OPENAI_API_KEY || '';
-  const googleApiKey = process.env.GOOGLE_API_KEY || '';
   const githubToken = process.env.GITHUB_TOKEN;
   const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10');
-  const modelId = process.env.MODEL || 'claude-haiku-4-5-20251001';
+  const modelId = process.env.MODEL || 'gemini-2.5-flash';
 
-  if (!repoName || !anthropicApiKey || !githubToken) {
+  if (!repoName || !githubToken) {
     console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] }));
     process.exit(0);
   }
@@ -109,24 +115,23 @@ async function main() {
     process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`);
   }
 
-  process.stderr.write('Running agent...\n');
+  process.stderr.write(`Running agent with ${modelId}...\n`);
 
   try {
-    const model = getModel(modelId, anthropicApiKey, openaiApiKey, googleApiKey);
     let stepCount = 0;
 
     const { text } = await generateText({
-      model,
+      model: getModel(modelId),
       system: SYSTEM_PROMPT,
       prompt: `Please review this pull request:\n\n${prDiff}`,
-      maxSteps,
+      stopWhen: stepCountIs(maxSteps),
       tools: {
         bash: tool({
           description: 'Run a shell command in the repository root.',
-          parameters: z.object({ command: z.string() }),
+          inputSchema: z.object({ command: z.string() }),
           execute: async ({ command }) => {
             stepCount++;
-            process.stderr.write(`Tool call ${stepCount}/${maxSteps}: bash(${JSON.stringify({ command })})\n`);
+            process.stderr.write(`Tool call ${stepCount}: bash(${JSON.stringify({ command })})\n`);
             return runBash(command);
           },
         }),
diff --git a/server.py b/server.py
index 5e19675..698cf17 100644
--- a/server.py
+++ b/server.py
@@ -1,4 +1,4 @@
-from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
+from fastapi import FastAPI, Request, HTTPException
 import hmac
 import hashlib
 import os
@@ -70,6 +70,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
             pr_data = pr_response.json()
             commit_sha = pr_data["head"]["sha"]
             branch_name = pr_data["head"]["ref"]
+            head_repo = pr_data["head"]["repo"]["full_name"]
         except Exception as e:
             print(f"[E2B] Failed to fetch PR data: {e}")
             return
@@ -113,7 +114,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
 
         # Write package.json and install Node dependencies
         print("[E2B] Installing Node.js dependencies...")
-        package_json = '{"type":"module","dependencies":{"ai":"^4.0.0","@ai-sdk/anthropic":"^1.0.0","@ai-sdk/openai":"^1.0.0","@ai-sdk/google":"^1.0.0","zod":"^3.0.0"}}'
+        package_json = '{"type":"module","dependencies":{"ai":"^6.0.0","@ai-sdk/anthropic":"^3.0.0","@ai-sdk/openai":"^3.0.0","@ai-sdk/google":"^3.0.0","zod":"^3.23.0"}}'
         sandbox.files.write('/app/package.json', package_json)
         sandbox.commands.run("cd /app && npm install -q", timeout=120)
         print("[E2B] Node.js dependencies installed")
@@ -121,13 +122,13 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         # Run the agent with env vars passed directly to the command
         print("[E2B] Starting agent process...")
         agent_envs = {
-            'REPO': repo,
+            'REPO': head_repo,
             'COMMIT_SHA': commit_sha,
             'BRANCH': branch_name,
             'GITHUB_TOKEN': token,
             'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""),
             'OPENAI_API_KEY': os.environ.get("OPENAI_API_KEY", ""),
-            'GOOGLE_API_KEY': os.environ.get("GOOGLE_API_KEY", ""),
+            'GOOGLE_GENERATIVE_AI_API_KEY': os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY", ""),
             'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"),
             'MODEL': os.environ.get("MODEL", "claude-haiku-4-5-20251001"),
         }
@@ -232,7 +233,7 @@ def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
         print("[E2B] Review task completed")
 
 @app.post('/webhook')
-async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
+async def webhook_handler(request: Request):
     # Verify signature
     signature = request.headers.get('X-Hub-Signature-256')
     body = await request.body()
@@ -242,20 +243,26 @@ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
 
     event = json.loads(body)
     action = event.get('action')
+    event_type = request.headers.get('X-GitHub-Event', 'unknown')
+    print(f"[Webhook] event={event_type} action={action}")
 
     # Only process PR open/update events
     if action in ['opened', 'reopened', 'synchronize']:
-        pr_number = event['pull_request']['number']
-        repo = event['pull_request']['base']['repo']['full_name']
-        installation_id = event['installation']['id']
-
-        # Run review in E2B sandbox in background (don't block webhook response)
-        background_tasks.add_task(
-            run_review_in_e2b,
-            repo=repo,
-            pr_number=pr_number,
-            installation_id=installation_id
+        try:
+            pr_number = event['pull_request']['number']
+            repo = event['pull_request']['base']['repo']['full_name']
+            installation_id = event['installation']['id']
+        except KeyError as e:
+            print(f"[Webhook] Missing key in payload: {e} — keys: {list(event.keys())}")
+            return {'status': 'ok'}
+
+        print(f"[Webhook] Scheduling review for PR #{pr_number} in {repo} (installation={installation_id})", flush=True)
+        thread = threading.Thread(
+            target=run_review_in_e2b,
+            kwargs={'repo': repo, 'pr_number': pr_number, 'installation_id': installation_id},
+            daemon=True
         )
+        thread.start()
 
     # Respond quickly (GitHub expects <30 sec)
     return {'status': 'ok'}