diff --git a/sandboxes/providers/e2b.py b/sandboxes/providers/e2b.py index f489cb6..53190d1 100644 --- a/sandboxes/providers/e2b.py +++ b/sandboxes/providers/e2b.py @@ -27,9 +27,18 @@ E2B_AVAILABLE = True except ImportError: E2B_AVAILABLE = False - E2BSandbox = None # Define as None when not available + E2BSandbox = None logger.warning("E2B SDK not available - install with: uv add e2b") +try: + from e2b_code_interpreter import AsyncSandbox as E2BCodeInterpreter + + E2B_CODE_INTERPRETER_AVAILABLE = True +except ImportError: + E2B_CODE_INTERPRETER_AVAILABLE = False + E2BCodeInterpreter = None + logger.warning("e2b_code_interpreter not available - install with: uv add e2b-code-interpreter") + class E2BProvider(SandboxProvider): """E2B sandbox provider using the official SDK.""" @@ -572,6 +581,104 @@ async def cleanup_idle_sandboxes(self, idle_timeout: int = 600): logger.info(f"Cleaning up idle sandbox {sandbox_id}") await self.destroy_sandbox(sandbox_id) + async def run_code( + self, + sandbox_id: str, + code: str, + language: str | None = None, + timeout: float | None = None, + request_timeout: float | None = None, + env_vars: dict[str, str] | None = None, + on_stdout=None, + on_stderr=None, + on_result=None, + on_error=None, + ) -> dict[str, Any]: + """ + Execute code with rich output capture (plots, DataFrames, etc.). + + Uses the e2b_code_interpreter SDK to run code via the Jupyter kernel + inside the sandbox, capturing rich MIME outputs (text, png, html, etc.) + alongside stdout/stderr. + + The sandbox must have been created with template="code-interpreter-v1" + (which runs a Jupyter kernel). Sandboxes using "base" template will fail. + + Args: + sandbox_id: ID of an existing sandbox + code: Code to execute + language: Kernel language (default: "python"). Accepts any language + supported by the sandbox's Jupyter kernels. + timeout: Code execution timeout in seconds (default: provider timeout) + request_timeout: HTTP request timeout in seconds + env_vars: Environment variables to set for this execution + on_stdout: Callback(OutputMessage) for real-time stdout lines + on_stderr: Callback(OutputMessage) for real-time stderr lines + on_result: Callback(Result) for each rich result object + on_error: Callback(ExecutionError) on execution error + + Returns: + dict with keys: success, stdout, stderr, exit_code, execution_time, + rich_outputs (list of {type, data, metadata} dicts) + + Raises: + ProviderError: if e2b_code_interpreter is not installed + SandboxNotFoundError: if sandbox_id is unknown + SandboxError: on execution failure + """ + if not E2B_CODE_INTERPRETER_AVAILABLE: + raise ProviderError( + "e2b_code_interpreter not installed — install with: uv add e2b-code-interpreter" + ) + if sandbox_id not in self._sandboxes: + raise SandboxNotFoundError(f"Sandbox {sandbox_id} not found") + + try: + self._sandboxes[sandbox_id]["last_accessed"] = time.time() + start_time = time.time() + + # Connect code-interpreter client to the existing running sandbox + interpreter = await E2BCodeInterpreter.connect(sandbox_id, api_key=self.api_key) + + execution = await interpreter.run_code( + code=code, + language=language, + envs=env_vars, + timeout=float(timeout) if timeout is not None else float(self.timeout), + request_timeout=request_timeout, + on_stdout=on_stdout, + on_stderr=on_stderr, + on_result=on_result, + on_error=on_error, + ) + + # Map Result objects → [{type, data, metadata}] (mirrors hopx shape) + rich_outputs = [] + for result in execution.results: + for fmt in result.formats(): + rich_outputs.append( + { + "type": fmt, + "data": result[fmt], + "metadata": result.extra or {}, + } + ) + + return { + "success": execution.error is None, + "stdout": "\n".join(execution.logs.stdout), + "stderr": "\n".join(execution.logs.stderr), + "exit_code": 0 if execution.error is None else 1, + "execution_time": time.time() - start_time, + "rich_outputs": rich_outputs, + } + + except (ProviderError, SandboxNotFoundError): + raise + except Exception as e: + logger.error(f"Failed to execute code in sandbox {sandbox_id}: {e}") + raise SandboxError(f"Failed to execute code: {e}") from e + def __del__(self): """Cleanup on deletion.""" # Shutdown thread pool diff --git a/tests/test_e2b_provider_regressions.py b/tests/test_e2b_provider_regressions.py index 9615985..9026c90 100644 --- a/tests/test_e2b_provider_regressions.py +++ b/tests/test_e2b_provider_regressions.py @@ -1,10 +1,12 @@ """Regression tests for E2B provider behaviors.""" from datetime import datetime +from unittest.mock import AsyncMock, MagicMock import pytest import sandboxes.providers.e2b as e2b_module +from sandboxes.exceptions import ProviderError, SandboxNotFoundError from sandboxes.providers.e2b import E2BProvider @@ -81,3 +83,67 @@ async def create(template=None, envs=None, api_key=None, timeout=None): # noqa: assert sandbox.sandbox_id == "sb-retry" assert calls["count"] == 2 + + +@pytest.mark.asyncio +async def test_run_code_returns_rich_outputs(monkeypatch): + """run_code maps Execution results to the hopx-compatible dict shape.""" + from e2b_code_interpreter.models import Execution, Logs, Result + + sandbox_id = "sb-code-123" + provider = E2BProvider(api_key="test-key") + provider._sandboxes[sandbox_id] = {"last_accessed": 0.0} + + # Build a mock Execution with one PNG result and stdout + png_result = Result(png="iVBORw0KGgo=", text="
", is_main_result=True) + execution = Execution( + results=[png_result], + logs=Logs(stdout=["Plot created"], stderr=[]), + error=None, + ) + + mock_interpreter = MagicMock() + mock_interpreter.run_code = AsyncMock(return_value=execution) + + mock_connect = AsyncMock(return_value=mock_interpreter) + monkeypatch.setattr(e2b_module, "E2BCodeInterpreter", MagicMock(connect=mock_connect)) + monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", True) + + result = await provider.run_code(sandbox_id, code="import matplotlib") + + assert result["success"] is True + assert result["stdout"] == "Plot created" + assert result["stderr"] == "" + assert result["exit_code"] == 0 + assert result["execution_time"] >= 0.0 + + types = [o["type"] for o in result["rich_outputs"]] + assert "png" in types + assert "text" in types + png_entry = next(o for o in result["rich_outputs"] if o["type"] == "png") + assert png_entry["data"] == "iVBORw0KGgo=" + + mock_connect.assert_awaited_once_with(sandbox_id, api_key="test-key") + + +@pytest.mark.asyncio +async def test_run_code_raises_when_not_available(monkeypatch): + """run_code raises ProviderError when e2b_code_interpreter is not installed.""" + monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", False) + + provider = E2BProvider(api_key="test-key") + provider._sandboxes["sb-x"] = {"last_accessed": 0.0} + + with pytest.raises(ProviderError, match="e2b_code_interpreter not installed"): + await provider.run_code("sb-x", code="print(1)") + + +@pytest.mark.asyncio +async def test_run_code_raises_for_unknown_sandbox(monkeypatch): + """run_code raises SandboxNotFoundError for an untracked sandbox_id.""" + monkeypatch.setattr(e2b_module, "E2B_CODE_INTERPRETER_AVAILABLE", True) + + provider = E2BProvider(api_key="test-key") + + with pytest.raises(SandboxNotFoundError): + await provider.run_code("sb-does-not-exist", code="print(1)")