Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 108 additions & 1 deletion sandboxes/providers/e2b.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,18 @@
E2B_AVAILABLE = True
except ImportError:
E2B_AVAILABLE = False
E2BSandbox = None # Define as None when not available
E2BSandbox = None
logger.warning("E2B SDK not available - install with: uv add e2b")

try:
from e2b_code_interpreter import AsyncSandbox as E2BCodeInterpreter

E2B_CODE_INTERPRETER_AVAILABLE = True
except ImportError:
E2B_CODE_INTERPRETER_AVAILABLE = False
E2BCodeInterpreter = None
logger.warning("e2b_code_interpreter not available - install with: uv add e2b-code-interpreter")


class E2BProvider(SandboxProvider):
"""E2B sandbox provider using the official SDK."""
Expand Down Expand Up @@ -572,6 +581,104 @@ async def cleanup_idle_sandboxes(self, idle_timeout: int = 600):
logger.info(f"Cleaning up idle sandbox {sandbox_id}")
await self.destroy_sandbox(sandbox_id)

async def run_code(
self,
sandbox_id: str,
code: str,
language: str | None = None,
timeout: float | None = None,
request_timeout: float | None = None,
env_vars: dict[str, str] | None = None,
on_stdout=None,
on_stderr=None,
on_result=None,
on_error=None,
) -> dict[str, Any]:
"""
Execute code with rich output capture (plots, DataFrames, etc.).

Uses the e2b_code_interpreter SDK to run code via the Jupyter kernel
inside the sandbox, capturing rich MIME outputs (text, png, html, etc.)
alongside stdout/stderr.

The sandbox must have been created with template="code-interpreter-v1"
(which runs a Jupyter kernel). Sandboxes using "base" template will fail.

Args:
sandbox_id: ID of an existing sandbox
code: Code to execute
language: Kernel language (default: "python"). Accepts any language
supported by the sandbox's Jupyter kernels.
timeout: Code execution timeout in seconds (default: provider timeout)
request_timeout: HTTP request timeout in seconds
env_vars: Environment variables to set for this execution
on_stdout: Callback(OutputMessage) for real-time stdout lines
on_stderr: Callback(OutputMessage) for real-time stderr lines
on_result: Callback(Result) for each rich result object
on_error: Callback(ExecutionError) on execution error

Returns:
dict with keys: success, stdout, stderr, exit_code, execution_time,
rich_outputs (list of {type, data, metadata} dicts)

Raises:
ProviderError: if e2b_code_interpreter is not installed
SandboxNotFoundError: if sandbox_id is unknown
SandboxError: on execution failure
"""
if not E2B_CODE_INTERPRETER_AVAILABLE:
raise ProviderError(
"e2b_code_interpreter not installed — install with: uv add e2b-code-interpreter"
)
if sandbox_id not in self._sandboxes:
raise SandboxNotFoundError(f"Sandbox {sandbox_id} not found")

try:
self._sandboxes[sandbox_id]["last_accessed"] = time.time()
start_time = time.time()

# Connect code-interpreter client to the existing running sandbox
interpreter = await E2BCodeInterpreter.connect(sandbox_id, api_key=self.api_key)

execution = await interpreter.run_code(
code=code,
language=language,
envs=env_vars,
timeout=float(timeout) if timeout is not None else float(self.timeout),
request_timeout=request_timeout,
on_stdout=on_stdout,
on_stderr=on_stderr,
on_result=on_result,
on_error=on_error,
)

# Map Result objects → [{type, data, metadata}] (mirrors hopx shape)
rich_outputs = []
for result in execution.results:
for fmt in result.formats():
rich_outputs.append(
{
"type": fmt,
"data": result[fmt],
"metadata": result.extra or {},
}
)

return {
"success": execution.error is None,
"stdout": "\n".join(execution.logs.stdout),
"stderr": "\n".join(execution.logs.stderr),
"exit_code": 0 if execution.error is None else 1,
"execution_time": time.time() - start_time,
"rich_outputs": rich_outputs,
}

except (ProviderError, SandboxNotFoundError):
raise
except Exception as e:
logger.error(f"Failed to execute code in sandbox {sandbox_id}: {e}")
raise SandboxError(f"Failed to execute code: {e}") from e

def __del__(self):
"""Cleanup on deletion."""
# Shutdown thread pool
Expand Down
66 changes: 66 additions & 0 deletions tests/test_e2b_provider_regressions.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""Regression tests for E2B provider behaviors."""

from datetime import datetime
from unittest.mock import AsyncMock, MagicMock

import pytest

import sandboxes.providers.e2b as e2b_module
from sandboxes.exceptions import ProviderError, SandboxNotFoundError
from sandboxes.providers.e2b import E2BProvider


Expand Down Expand Up @@ -81,3 +83,67 @@ async def create(template=None, envs=None, api_key=None, timeout=None): # noqa:

assert sandbox.sandbox_id == "sb-retry"
assert calls["count"] == 2


@pytest.mark.asyncio
async def test_run_code_returns_rich_outputs(monkeypatch):
"""run_code maps Execution results to the hopx-compatible dict shape."""
from e2b_code_interpreter.models import Execution, Logs, Result

sandbox_id = "sb-code-123"
provider = E2BProvider(api_key="test-key")
provider._sandboxes[sandbox_id] = {"last_accessed": 0.0}

# Build a mock Execution with one PNG result and stdout
png_result = Result(png="iVBORw0KGgo=", text="<Figure>", is_main_result=True)
execution = Execution(
results=[png_result],
logs=Logs(stdout=["Plot created"], stderr=[]),
error=None,
)

mock_interpreter = MagicMock()
mock_interpreter.run_code = AsyncMock(return_value=execution)

mock_connect = AsyncMock(return_value=mock_interpreter)
monkeypatch.setattr(e2b_module, "E2BCodeInterpreter", MagicMock(connect=mock_connect))
monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", True)

result = await provider.run_code(sandbox_id, code="import matplotlib")

assert result["success"] is True
assert result["stdout"] == "Plot created"
assert result["stderr"] == ""
assert result["exit_code"] == 0
assert result["execution_time"] >= 0.0

types = [o["type"] for o in result["rich_outputs"]]
assert "png" in types
assert "text" in types
png_entry = next(o for o in result["rich_outputs"] if o["type"] == "png")
assert png_entry["data"] == "iVBORw0KGgo="

mock_connect.assert_awaited_once_with(sandbox_id, api_key="test-key")


@pytest.mark.asyncio
async def test_run_code_raises_when_not_available(monkeypatch):
"""run_code raises ProviderError when e2b_code_interpreter is not installed."""
monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", False)

provider = E2BProvider(api_key="test-key")
provider._sandboxes["sb-x"] = {"last_accessed": 0.0}

with pytest.raises(ProviderError, match="e2b_code_interpreter not installed"):
await provider.run_code("sb-x", code="print(1)")


@pytest.mark.asyncio
async def test_run_code_raises_for_unknown_sandbox(monkeypatch):
"""run_code raises SandboxNotFoundError for an untracked sandbox_id."""
monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", True)

provider = E2BProvider(api_key="test-key")

with pytest.raises(SandboxNotFoundError):
await provider.run_code("sb-does-not-exist", code="print(1)")