diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..683d6c2
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,2 @@
+private-key.pem
+*.pem
diff --git a/Dockerfile b/Dockerfile
index f1e9e0f..abe3544 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,9 @@
 # Use Python 3.11 slim image
 FROM python:3.11-slim
 
+# Disable Python output buffering so logs appear immediately
+ENV PYTHONUNBUFFERED=1
+
 # Set working directory
 WORKDIR /app
 
diff --git a/agent.mjs b/agent.mjs
new file mode 100644
index 0000000..1c5f1bc
--- /dev/null
+++ b/agent.mjs
@@ -0,0 +1,161 @@
+import { generateText, tool, stepCountIs } from 'ai';
+import { anthropic } from '@ai-sdk/anthropic';
+import { openai } from '@ai-sdk/openai';
+import { google } from '@ai-sdk/google';
+import { z } from 'zod';
+import { spawnSync } from 'child_process';
+import { readFileSync } from 'fs';
+
+function runBash(command, timeout = 30000) {
+  const safeEnv = {
+    PATH: process.env.PATH || '/usr/bin:/bin:/usr/local/bin',
+    HOME: process.env.HOME || '/root',
+    LANG: process.env.LANG || 'en_US.UTF-8',
+  };
+
+  const result = spawnSync('sh', ['-c', command], {
+    cwd: '/tmp/repo',
+    env: safeEnv,
+    timeout,
+    encoding: 'utf8',
+  });
+
+  if (result.error?.code === 'ETIMEDOUT') return `ERROR: Command timed out after ${timeout / 1000}s`;
+  if (result.error) return `ERROR: ${result.error.message}`;
+
+  let output = result.stdout || '';
+  if (result.stderr) output += `\n[stderr]: ${result.stderr}`;
+  if (!output.trim()) return `[exit code ${result.status}, no output]`;
+  if (output.length > 8000) output = output.slice(0, 8000) + `\n...[truncated, ${output.length} total chars]`;
+  return output;
+}
+
+const SYSTEM_PROMPT = `You are a senior software engineer doing a thorough review of a pull request.
+
+The repository is already cloned at the repo root. You have full bash access — use it liberally, there is no cost to running many commands.
+
+You MUST do all of the following before forming any conclusions:
+1. Read each changed file in full, not just the diff
+2. Find every caller and usage of any modified function, class, or symbol across the entire repo
+3. Read related files — tests, configs, dependent modules, anything that could be affected
+4. Check for edge cases: error handling, concurrency, security, null/undefined, type mismatches
+5. Run any additional commands needed to fully understand the impact
+
+Use as many bash calls as you need. Do not cut corners.
+
+Only after thorough exploration, return at most 3 findings as JSON — no other text. Focus on real bugs, security issues, or broken logic. Skip style nits.
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.`;
+
+function getModel(modelId) {
+  if (modelId.startsWith('gpt-') || modelId.startsWith('o1') || modelId.startsWith('o3') || modelId.startsWith('o4')) {
+    return openai(modelId);
+  }
+  if (modelId.startsWith('gemini-')) {
+    return google(modelId);
+  }
+  return anthropic(modelId);
+}
+
+async function main() {
+  const repoName = process.env.REPO;
+  const branchName = process.env.BRANCH || 'main';
+  const githubToken = process.env.GITHUB_TOKEN;
+  const maxSteps = parseInt(process.env.MAX_TOOL_CALLS || '10');
+  const modelId = process.env.MODEL || 'gemini-2.5-flash';
+
+  if (!repoName || !githubToken) {
+    console.log(JSON.stringify({ error: 'Missing required environment variables', findings: [] }));
+    process.exit(0);
+  }
+
+  // Clone the repository
+  process.stderr.write('Cloning repository...\n');
+  const cloneDir = '/tmp/repo';
+  const cloneUrl = `https://${githubToken}@github.com/${repoName}.git`;
+
+  spawnSync('rm', ['-rf', cloneDir]);
+  const cloneResult = spawnSync(
+    'git',
+    ['clone', '--depth=1', '--branch', branchName, cloneUrl, cloneDir],
+    { encoding: 'utf8', env: { ...process.env, GIT_TERMINAL_PROMPT: '0' } }
+  );
+
+  if (cloneResult.status !== 0) {
+    console.log(JSON.stringify({ error: `Failed to clone: ${cloneResult.stderr || 'unknown error'}`, findings: [] }));
+    process.exit(0);
+  }
+  process.stderr.write(`Cloned ${repoName} branch ${branchName} (shallow)\n`);
+
+  // Strip token from git config to prevent exfiltration via bash tool
+  spawnSync('git', ['remote', 'set-url', 'origin', `https://github.com/${repoName}.git`], {
+    cwd: cloneDir, encoding: 'utf8',
+  });
+
+  // Read PR diff
+  let prDiff = '';
+  try {
+    prDiff = readFileSync('/app/pr.diff', 'utf8');
+  } catch (e) {
+    process.stderr.write(`Warning: Could not read PR diff (${e.message})\n`);
+  }
+
+  process.stderr.write(`Running agent with ${modelId}...\n`);
+
+  try {
+    let stepCount = 0;
+
+    const { text } = await generateText({
+      model: getModel(modelId),
+      system: SYSTEM_PROMPT,
+      prompt: `Please review this pull request:\n\n${prDiff}`,
+      stopWhen: stepCountIs(maxSteps),
+      tools: {
+        bash: tool({
+          description: 'Run a shell command in the repository root.',
+          inputSchema: z.object({ command: z.string() }),
+          execute: async ({ command }) => {
+            stepCount++;
+            process.stderr.write(`Tool call ${stepCount}: bash(${JSON.stringify({ command })})\n`);
+            return runBash(command);
+          },
+        }),
+      },
+    });
+
+    // Strip markdown fences if present
+    let cleaned = text.trim();
+    if (cleaned.startsWith('```')) {
+      const lines = cleaned.split('\n').slice(1);
+      if (lines.at(-1)?.trim() === '```') lines.pop();
+      cleaned = lines.join('\n').trim();
+    }
+
+    const output = JSON.parse(cleaned);
+    if (!Array.isArray(output.findings)) output.findings = [];
+    output.findings = output.findings.slice(0, 3);
+
+    console.log(JSON.stringify(output));
+    process.exit(0);
+  } catch (e) {
+    console.log(JSON.stringify({ error: `Agent failed: ${e.message}`, findings: [] }));
+    process.exit(0);
+  }
+}
+
+main();
diff --git a/agent.py b/agent.py
new file mode 100644
index 0000000..12cfd8e
--- /dev/null
+++ b/agent.py
@@ -0,0 +1,210 @@
+import os
+import subprocess
+import json
+import sys
+from anthropic import Anthropic
+
+
+def run_bash(command: str, timeout: int = 30) -> str:
+    """Run a shell command in the cloned repo directory with secrets stripped from env."""
+    try:
+        # Minimal env — no secrets accessible to shell commands (prevents prompt injection exfiltration)
+        safe_env = {
+            "PATH": os.environ.get("PATH", "/usr/bin:/bin:/usr/local/bin"),
+            "HOME": os.environ.get("HOME", "/root"),
+            "LANG": os.environ.get("LANG", "en_US.UTF-8"),
+        }
+        result = subprocess.run(
+            command, shell=True, capture_output=True,
+            text=True, timeout=timeout, cwd='/tmp/repo',
+            env=safe_env
+        )
+        output = result.stdout
+        if result.stderr:
+            output += f"\n[stderr]: {result.stderr}"
+        if not output.strip():
+            return f"[exit code {result.returncode}, no output]"
+        if len(output) > 8000:
+            output = output[:8000] + f"\n...[truncated, {len(output)} total chars]"
+        return output
+    except subprocess.TimeoutExpired:
+        return f"ERROR: Command timed out after {timeout}s"
+    except Exception as e:
+        return f"ERROR: {str(e)}"
+
+
+TOOLS = [
+    {
+        "name": "bash",
+        "description": "Run a shell command in the repository root.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "command": {"type": "string", "description": "The shell command to run"}
+            },
+            "required": ["command"]
+        }
+    }
+]
+
+SYSTEM_PROMPT = """You are a senior software engineer reviewing a pull request.
+
+The repository is already cloned and your working directory is the repo root. You have a bash tool with full shell access — use it however you see fit to understand the changes and their impact.
+
+Return at most 3 findings as JSON — no other text. Each finding can be an inline comment (specific file + line) or an overall assessment (no file/line). Use whichever makes more sense for each issue.
+
+{
+  "findings": [
+    {
+      "file": "path/to/file.py",
+      "line": 123,
+      "severity": "critical" | "high" | "medium" | "low",
+      "message": "..."
+    },
+    {
+      "severity": "medium",
+      "message": "Overall: ..."
+    }
+  ]
+}
+
+If there are no significant issues, return {"findings": []}.
+"""
+
+
+def run_agent(client: Anthropic, pr_diff: str, max_tool_calls: int) -> str:
+    """Run the agentic loop using Anthropic's tool use API."""
+    messages = [
+        {
+            "role": "user",
+            "content": f"Please review this pull request:\n\n{pr_diff}"
+        }
+    ]
+    tool_call_count = 0
+
+    while True:
+        response = client.messages.create(
+            model="claude-haiku-4-5-20251001",
+            max_tokens=4096,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages
+        )
+
+        messages.append({"role": "assistant", "content": response.content})
+
+        if response.stop_reason == "end_turn":
+            print(f"end_turn content: {response.content}", file=sys.stderr)
+            for block in response.content:
+                if hasattr(block, 'text') and block.text.strip():
+                    return block.text
+            return ""
+
+        if response.stop_reason == "tool_use":
+            tool_results = []
+            for block in response.content:
+                if block.type == "tool_use":
+                    tool_call_count += 1
+                    print(f"Tool call {tool_call_count}/{max_tool_calls}: {block.name}({json.dumps(block.input)})", file=sys.stderr)
+                    result = run_bash(block.input["command"]) if block.name == "bash" else f"Unknown tool: {block.name}"
+                    tool_results.append({
+                        "type": "tool_result",
+                        "tool_use_id": block.id,
+                        "content": result
+                    })
+
+            messages.append({"role": "user", "content": tool_results})
+
+            if tool_call_count >= max_tool_calls:
+                print(f"Tool call limit ({max_tool_calls}) reached, requesting final answer.", file=sys.stderr)
+                messages.append({"role": "user", "content": "You've used the maximum number of tool calls. Based on everything you've seen, provide your final review as JSON now."})
+                response = client.messages.create(
+                    model="claude-haiku-4-5-20251001",
+                    max_tokens=4096,
+                    system=SYSTEM_PROMPT,
+                    messages=messages
+                )
+                print(f"Final response stop_reason={response.stop_reason} content={response.content}", file=sys.stderr)
+                for block in response.content:
+                    if hasattr(block, 'text') and block.text.strip():
+                        return block.text
+                return ""
+        else:
+            break
+
+    return ""
+
+
+def main():
+    repo_name = os.environ.get('REPO')
+    commit_sha = os.environ.get('COMMIT_SHA')
+    branch_name = os.environ.get('BRANCH', 'main')
+    anthropic_api_key = os.environ.get('ANTHROPIC_API_KEY')
+    github_token = os.environ.get('GITHUB_TOKEN')
+    max_tool_calls = int(os.environ.get('MAX_TOOL_CALLS', '10'))
+
+    if not all([repo_name, commit_sha, anthropic_api_key, github_token]):
+        print(json.dumps({"error": "Missing required environment variables", "findings": []}))
+        sys.exit(1)
+
+    # Clone the repository at the PR commit
+    print("Cloning repository...", file=sys.stderr)
+    clone_dir = '/tmp/repo'
+    clone_url = f"https://{github_token}@github.com/{repo_name}.git"
+
+    try:
+        subprocess.run(['rm', '-rf', clone_dir], check=True)
+        subprocess.run(
+            ['git', 'clone', '--depth=1', '--branch', branch_name, clone_url, clone_dir],
+            check=True, capture_output=True,
+            env={**os.environ, 'GIT_TERMINAL_PROMPT': '0'}
+        )
+        print(f"Cloned {repo_name} branch {branch_name} (shallow)", file=sys.stderr)
+    except subprocess.CalledProcessError as e:
+        print(json.dumps({"error": f"Failed to clone: {e.stderr.decode() if e.stderr else str(e)}", "findings": []}))
+        sys.exit(1)
+
+    os.chdir(clone_dir)
+
+    # Read PR diff from file written by server.py
+    try:
+        with open('/app/pr.diff', 'r', encoding='utf-8') as f:
+            pr_diff = f.read()
+    except Exception as e:
+        print(f"Warning: Could not read PR diff ({e})", file=sys.stderr)
+        pr_diff = ""
+
+    # Run the agentic review
+    print("Running agent...", file=sys.stderr)
+    client = Anthropic(api_key=anthropic_api_key)
+
+    try:
+        final_response = run_agent(client, pr_diff, max_tool_calls)
+
+        if not final_response:
+            raise ValueError("No response from agent")
+
+        # Strip markdown fences if present
+        cleaned = final_response.strip()
+        if cleaned.startswith('```'):
+            lines = cleaned.split('\n')[1:]
+            if lines and lines[-1].strip() == '```':
+                lines = lines[:-1]
+            cleaned = '\n'.join(lines).strip()
+
+        output = json.loads(cleaned)
+        if not isinstance(output.get('findings'), list):
+            output['findings'] = []
+        output['findings'] = output['findings'][:3]
+
+        print(json.dumps(output))
+        sys.exit(0)
+
+    except json.JSONDecodeError as e:
+        print(json.dumps({"error": f"Invalid JSON from agent: {str(e)}", "findings": []}))
+    except Exception as e:
+        print(json.dumps({"error": f"Agent failed: {str(e)}", "findings": []}))
+
+
+if __name__ == '__main__':
+    main()
diff --git a/requirements.txt b/requirements.txt
index 137a9b2..2cf69ef 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
+e2b>=1.0.0
+anthropic>=0.25.0
 openai
 requests
 python-dotenv
diff --git a/review.py b/review.py
index 50d475f..a7a3b93 100644
--- a/review.py
+++ b/review.py
@@ -40,6 +40,43 @@
 # Helper Functions
 # -------------------------
 
+def clean_and_parse_json(text: str):
+    """Clean LLM response and parse JSON, removing markdown fences and extracting the first JSON object."""
+    if not text:
+        raise ValueError("Empty response")
+
+    cleaned = text.strip()
+
+    # Remove markdown code fences (```json ... ``` or ``` ... ```)
+    if cleaned.startswith("```"):
+        lines = cleaned.split('\n')
+        # Skip first line if it's a fence
+        if lines[0].strip().startswith("```"):
+            lines = lines[1:]
+        # Remove last line if it's a closing fence
+        if lines and lines[-1].strip() == "```":
+            lines = lines[:-1]
+        cleaned = '\n'.join(lines).strip()
+
+    # Try direct parse first
+    try:
+        return json.loads(cleaned)
+    except json.JSONDecodeError:
+        pass  # Continue to brace extraction
+
+    # Find the first '{' and last '}' to extract a JSON object
+    start = cleaned.find('{')
+    end = cleaned.rfind('}')
+    if start != -1 and end != -1 and end > start:
+        candidate = cleaned[start:end+1]
+        try:
+            return json.loads(candidate)
+        except json.JSONDecodeError:
+            pass
+
+    # If we get here, parsing failed
+    raise ValueError(f"Could not extract valid JSON from response (first 200 chars): {text[:200]}...")
+
 def should_skip_file(filename):
     """Check if a file should be skipped based on extension or directory."""
     # Check extension
@@ -189,6 +226,7 @@ def process_review(repo: str, pr_number: int, installation_id: int):
   "comments": [
     {{
       "line": <line number>,
+      "severity": "<critical|high|medium|low>",
       "message": "<review comment>"
     }}
   ]
@@ -197,6 +235,8 @@ def process_review(repo: str, pr_number: int, installation_id: int):
 If there are no issues, return:
 
 {{ "comments": [] }}
+
+IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting (such as ```json code fences```).
 """
 
         try:
@@ -210,18 +250,122 @@ def process_review(repo: str, pr_number: int, installation_id: int):
             )
 
             result = response.choices[0].message.content
-            data = json.loads(result)
+            try:
+                data = clean_and_parse_json(result)
+            except Exception as e:
+                print(f"Failed to parse LLM response for {filename}: {e}")
+                continue
 
             for c in data["comments"]:
                 comments.append({
                     "path": filename,
                     "line": c["line"],
+                    "severity": c.get("severity", "medium"),
                     "body": c["message"]
                 })
 
         except Exception as e:
             print(f"LLM review failed for {filename}: {e}")
 
+    # Second pass: Consolidate and prioritize all comments (max 5 total)
+    all_file_comments = comments  # Save all collected comments
+    print(f"\nCollected {len(all_file_comments)} potential issues. Consolidating to max {MAX_COMMENTS_PER_PR}...")
+
+    comments = []
+    if all_file_comments:
+        # Build a summary of all issues for prioritization
+        issues_summary = ""
+        for idx, comment in enumerate(all_file_comments, 1):
+            issues_summary += f"{idx}. [{comment['severity'].upper()}] {comment['path']}:{comment['line']} - {comment['body'][:100]}...\n"
+
+        consolidation_prompt = f"""
+You are a senior software engineer conducting a final PR review.
+
+The following potential issues were identified across all files:
+
+{issues_summary}
+
+Select at most {MAX_COMMENTS_PER_PR} of the MOST CRITICAL issues to actually comment on.
+
+IMPORTANT RULES:
+- Prioritize critical and high severity issues first
+- Skip low-severity issues (style, nitpicks) unless there's something really important
+- If multiple issues are related, consider if they can be combined into a single comment
+- Be conservative - it's better to have fewer, more impactful comments than many minor ones
+- **You MUST include at least one comment (either a selected issue or a summary comment), even if the code looks perfect.** If no significant issues are found, provide a positive or neutral summary comment like "No significant issues found. The changes look good."
+
+Return JSON in this format:
+
+{{
+  "selected_issues": [
+    {{
+      "original_index": <index from list above>,
+      "comment": {{
+        "line": <line number>,
+        "message": "<final review comment - refine if needed for clarity>"
+      }}
+    }}
+  ],
+  "summary_comment": "<if you want to add general feedback instead of or in addition to specific line comments. This can be used to satisfy the 'at least one comment' rule when there are no issues.>"
+}}
+
+Do NOT return empty strings. Ensure either selected_issues contains at least one item OR summary_comment is non-empty.
+
+IMPORTANT: Return ONLY valid JSON. Do not include any other text, explanations, or markdown formatting.
+"""
+
+        try:
+            response = client.chat.completions.create(
+                model="gpt-4.1-mini",
+                messages=[
+                    {"role": "system", "content": "You are a senior software engineer with extreme technical expertise."},
+                    {"role": "user", "content": consolidation_prompt}
+                ],
+                temperature=0
+            )
+
+            result = response.choices[0].message.content
+            try:
+                data = clean_and_parse_json(result)
+            except Exception as e:
+                raise ValueError(f"Consolidation JSON parsing failed: {e}")
+
+            # Add selected specific comments
+            for selected in data["selected_issues"][:MAX_COMMENTS_PER_PR]:
+                idx = selected["original_index"] - 1
+                if 0 <= idx < len(all_file_comments):
+                    original = all_file_comments[idx]
+                    comments.append({
+                        "path": original["path"],
+                        "line": selected["comment"]["line"],
+                        "body": selected["comment"]["message"]
+                    })
+
+            # Optionally add summary comment if present and we have room
+            if data.get("summary_comment") and len(comments) < MAX_COMMENTS_PER_PR:
+                # Summary comments are posted as general PR comments (no line number)
+                comments.append({
+                    "path": None,  # Indicates general PR comment
+                    "line": None,
+                    "body": data["summary_comment"]
+                })
+
+        except Exception as e:
+            print(f"Consolidation failed, falling back to all comments: {e}")
+            # Fallback: just use all comments but respect the limit
+            comments = [
+                {"path": c["path"], "line": c["line"], "body": c["body"]}
+                for c in all_file_comments[:MAX_COMMENTS_PER_PR]
+            ]
+
+    # Ensure at least one comment is posted, even if no issues were found
+    if len(comments) == 0:
+        comments.append({
+            "path": None,
+            "line": None,
+            "body": "Review complete: No significant issues found. The changes look good."
+        })
+
     # -------------------------
     # Get commit SHA
     # -------------------------
diff --git a/server.py b/server.py
index 4d85de4..698cf17 100644
--- a/server.py
+++ b/server.py
@@ -1,14 +1,19 @@
-from fastapi import FastAPI, Request, HTTPException, BackgroundTasks
+from fastapi import FastAPI, Request, HTTPException
 import hmac
 import hashlib
 import os
 import json
+import threading
+import requests
+from e2b import Sandbox
 from github_app import GitHubAppAuth
-from review import process_review
 
 app = FastAPI(title="PR Review Bot")
 WEBHOOK_SECRET = os.environ.get('GITHUB_WEBHOOK_SECRET')
 
+# Semaphore to limit concurrent E2B sandboxes (max 3)
+e2b_semaphore = threading.Semaphore(3)
+
 def verify_signature(request_body: bytes, signature: str) -> bool:
     """Verify webhook signature from GitHub."""
     if not signature or not WEBHOOK_SECRET:
@@ -22,8 +27,213 @@ def verify_signature(request_body: bytes, signature: str) -> bool:
 
     return hmac.compare_digest(signature, expected)
 
+def run_review_in_e2b(repo: str, pr_number: int, installation_id: int):
+    """
+    Run PR review using E2B sandbox and Anthropic Agent SDK.
+    Runs PR review using E2B sandbox and Anthropic agent.
+    """
+    print(f"[E2B] Starting review for PR #{pr_number} in {repo}")
+
+    # Acquire semaphore to limit concurrent sandboxes
+    acquired = False
+    try:
+        acquired = e2b_semaphore.acquire(blocking=True, timeout=60)
+        if not acquired:
+            print("[E2B] Timeout acquiring semaphore, skipping review")
+            return
+    except Exception as e:
+        print(f"[E2B] Semaphore error: {e}")
+        return
+
+    sandbox = None
+    stdout_chunks = []
+    try:
+        # Get GitHub App auth token
+        auth = GitHubAppAuth(
+            app_id=os.environ["GITHUB_APP_ID"],
+            private_key_path=os.environ["GITHUB_PRIVATE_KEY_PATH"],
+            installation_id=installation_id
+        )
+        token = auth.get_installation_token()
+
+        # Setup headers with installation token
+        headers = {
+            "Authorization": f"token {token}",
+            "Accept": "application/vnd.github+json"
+        }
+
+        # Fetch PR data to get diff and commit SHA
+        pr_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}"
+        try:
+            pr_response = requests.get(pr_url, headers=headers)
+            pr_response.raise_for_status()
+            pr_data = pr_response.json()
+            commit_sha = pr_data["head"]["sha"]
+            branch_name = pr_data["head"]["ref"]
+            head_repo = pr_data["head"]["repo"]["full_name"]
+        except Exception as e:
+            print(f"[E2B] Failed to fetch PR data: {e}")
+            return
+
+        # Get PR diff (unified diff format)
+        diff_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/files"
+        try:
+            diff_response = requests.get(diff_url, headers=headers)
+            diff_response.raise_for_status()
+            files_data = diff_response.json()
+
+            # Build unified diff from files data
+            diff_parts = []
+            for file in files_data:
+                if 'patch' in file:
+                    diff_parts.append(f"--- a/{file['filename']}")
+                    diff_parts.append(f"+++ b/{file['filename']}")
+                    diff_parts.append(file['patch'])
+            pr_diff = '\n'.join(diff_parts)
+        except Exception as e:
+            print(f"[E2B] Failed to fetch PR diff: {e}")
+            return
+
+        # Create E2B sandbox
+        print("[E2B] Creating sandbox...")
+        if not os.environ.get("E2B_API_KEY"):
+            raise ValueError("E2B_API_KEY not set")
+
+        sandbox = Sandbox.create(template="claude", timeout=300)
+        print("[E2B] Sandbox created")
+
+        # Upload agent.mjs to sandbox
+        with open('/app/agent.mjs', 'r') as f:
+            agent_code = f.read()
+        sandbox.files.write('/app/agent.mjs', agent_code)
+        print("[E2B] agent.mjs uploaded")
+
+        # Write PR diff to a file to avoid env var size limits for large PRs
+        sandbox.files.write('/app/pr.diff', pr_diff)
+        print("[E2B] PR diff written to /app/pr.diff")
+
+        # Write package.json and install Node dependencies
+        print("[E2B] Installing Node.js dependencies...")
+        package_json = '{"type":"module","dependencies":{"ai":"^6.0.0","@ai-sdk/anthropic":"^3.0.0","@ai-sdk/openai":"^3.0.0","@ai-sdk/google":"^3.0.0","zod":"^3.23.0"}}'
+        sandbox.files.write('/app/package.json', package_json)
+        sandbox.commands.run("cd /app && npm install -q", timeout=120)
+        print("[E2B] Node.js dependencies installed")
+
+        # Run the agent with env vars passed directly to the command
+        print("[E2B] Starting agent process...")
+        agent_envs = {
+            'REPO': head_repo,
+            'COMMIT_SHA': commit_sha,
+            'BRANCH': branch_name,
+            'GITHUB_TOKEN': token,
+            'ANTHROPIC_API_KEY': os.environ.get("ANTHROPIC_API_KEY", ""),
+            'OPENAI_API_KEY': os.environ.get("OPENAI_API_KEY", ""),
+            'GOOGLE_GENERATIVE_AI_API_KEY': os.environ.get("GOOGLE_GENERATIVE_AI_API_KEY", ""),
+            'MAX_TOOL_CALLS': os.environ.get("MAX_TOOL_CALLS", "10"),
+            'MODEL': os.environ.get("MODEL", "claude-haiku-4-5-20251001"),
+        }
+
+        stdout_chunks = []
+        sandbox.commands.run(
+            "node /app/agent.mjs",
+            envs=agent_envs,
+            timeout=290,
+            on_stdout=lambda data: stdout_chunks.append(data),
+            on_stderr=lambda data: print(f"[Agent] {data}", end='', flush=True)
+        )
+        stdout = ''.join(stdout_chunks)
+
+        print("[E2B] Agent process completed")
+
+        # Parse the JSON output
+        print(f"[E2B] Agent stdout: {stdout[:500]}")
+        try:
+            output = json.loads(stdout)
+            findings = output.get('findings', [])
+
+            # Cap at 3 findings
+            findings = findings[:3]
+            print(f"[E2B] Review completed: {len(findings)} findings")
+
+            # Build set of files in this PR for validation of inline comments
+            pr_files = {f['filename'] for f in files_data}
+
+            for finding in findings:
+                file_path = finding.get('file')
+                line = finding.get('line')
+                severity = finding.get('severity', 'medium')
+                message = finding.get('message', '')
+
+                if not message:
+                    continue
+
+                comment_body = f"**[{severity.upper()}]** {message}"
+
+                if file_path and line:
+                    # Inline comment — validate file is in the PR diff
+                    if file_path not in pr_files:
+                        print(f"[E2B] Skipping inline finding: {file_path} not in PR diff")
+                        continue
+                    comment_url = f"https://api.github.com/repos/{repo}/pulls/{pr_number}/comments"
+                    payload = {
+                        "body": comment_body,
+                        "commit_id": commit_sha,
+                        "path": file_path,
+                        "line": line
+                    }
+                    label = f"{file_path}:{line}"
+                else:
+                    # Overall assessment — post as a general PR comment
+                    comment_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+                    payload = {"body": comment_body}
+                    label = "overall"
+
+                try:
+                    response = requests.post(comment_url, headers=headers, json=payload)
+                    response.raise_for_status()
+                    print(f"[E2B] Posted comment ({label})")
+                except requests.HTTPError as e:
+                    if e.response.status_code == 422 and file_path and line:
+                        # Line not in diff — fall back to general issue comment
+                        print(f"[E2B] Inline comment rejected (line not in diff), posting as issue comment ({label})")
+                        fallback_url = f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments"
+                        fallback_payload = {"body": f"**[{severity.upper()}]** `{file_path}` (line {line}): {message}"}
+                        try:
+                            requests.post(fallback_url, headers=headers, json=fallback_payload).raise_for_status()
+                            print(f"[E2B] Posted fallback comment ({label})")
+                        except Exception as e2:
+                            print(f"[E2B] Failed to post fallback comment: {e2}")
+                    else:
+                        print(f"[E2B] Failed to post comment ({label}): {e}")
+
+            if not findings:
+                print("[E2B] No findings, skipping comment")
+
+        except json.JSONDecodeError as e:
+            print(f"[E2B] Failed to parse agent output: {e}")
+            print(f"[E2B] Agent output (first 500 chars): {stdout[:500]}")
+
+    except Exception as e:
+        print(f"[E2B] Error during review: {e}")
+        if stdout_chunks:
+            print(f"[E2B] Agent stdout: {''.join(stdout_chunks)}")
+
+    finally:
+        # Ensure sandbox is closed
+        if sandbox:
+            try:
+                sandbox.kill()
+                print("[E2B] Sandbox killed")
+            except Exception as e:
+                print(f"[E2B] Error killing sandbox: {e}")
+
+        # Release semaphore
+        if acquired:
+            e2b_semaphore.release()
+        print("[E2B] Review task completed")
+
 @app.post('/webhook')
-async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
+async def webhook_handler(request: Request):
     # Verify signature
     signature = request.headers.get('X-Hub-Signature-256')
     body = await request.body()
@@ -33,20 +243,26 @@ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
 
     event = json.loads(body)
     action = event.get('action')
+    event_type = request.headers.get('X-GitHub-Event', 'unknown')
+    print(f"[Webhook] event={event_type} action={action}")
 
     # Only process PR open/update events
     if action in ['opened', 'reopened', 'synchronize']:
-        pr_number = event['pull_request']['number']
-        repo = event['pull_request']['base']['repo']['full_name']
-        installation_id = event['installation']['id']
-
-        # Run review in background (don't block webhook response)
-        background_tasks.add_task(
-            process_review,
-            repo=repo,
-            pr_number=pr_number,
-            installation_id=installation_id
+        try:
+            pr_number = event['pull_request']['number']
+            repo = event['pull_request']['base']['repo']['full_name']
+            installation_id = event['installation']['id']
+        except KeyError as e:
+            print(f"[Webhook] Missing key in payload: {e} — keys: {list(event.keys())}")
+            return {'status': 'ok'}
+
+        print(f"[Webhook] Scheduling review for PR #{pr_number} in {repo} (installation={installation_id})", flush=True)
+        thread = threading.Thread(
+            target=run_review_in_e2b,
+            kwargs={'repo': repo, 'pr_number': pr_number, 'installation_id': installation_id},
+            daemon=True
         )
+        thread.start()
 
     # Respond quickly (GitHub expects <30 sec)
     return {'status': 'ok'}