diff --git a/src/octopal/runtime/octo/router.py b/src/octopal/runtime/octo/router.py index e4af93dd..eb2487f7 100644 --- a/src/octopal/runtime/octo/router.py +++ b/src/octopal/runtime/octo/router.py @@ -1352,9 +1352,9 @@ async def route_worker_results_back_to_octo( "- Do not start, stop, schedule, or orchestrate workers from this path.\n" "- Do not invent follow-up tool needs beyond the tools already exposed here.\n\n" "If a worker was asked to write or save its result to a workspace path and it returned " - "the content instead, use `fs_write` to persist the content to the exact requested " - "workspace-relative path before deciding whether the user needs an update. Use `fs_write` " - "for ordinary workspace artifacts, generated reports, research notes, and draft files. " + "the content instead, use `fs_write` only when that requested path is under " + "`reports/` or `artifacts/`. Do not write worker-returned content to any other " + "workspace path. " "Use `manage_canon` only for durable canonical knowledge in the supported canon files.\n\n" "If any payload output is truncated and a payload includes `worker_id`, you may use " "`get_worker_output_path` for a specific dotted path lookup.\n" @@ -1764,8 +1764,12 @@ def _get_worker_followup_tools(octo: Any, chat_id: int) -> tuple[list[ToolSpec], "skill_exec": True, "skill_manage": True, } + workspace_root = Path(os.getenv("OCTOPAL_WORKSPACE_DIR", "workspace")).resolve() ctx = { - "base_dir": Path(os.getenv("OCTOPAL_WORKSPACE_DIR", "workspace")).resolve(), + "base_dir": workspace_root, + "workspace_root": workspace_root, + "allowed_paths": list(_DURABLE_WORKSPACE_ROOTS), + "restrict_to_allowed_paths": True, "octo": octo, "chat_id": chat_id, } diff --git a/src/octopal/tools/connectors/drive.py b/src/octopal/tools/connectors/drive.py index 2961050e..96436bc2 100644 --- a/src/octopal/tools/connectors/drive.py +++ b/src/octopal/tools/connectors/drive.py @@ -156,13 +156,14 @@ async def drive_download_to_workspace( if not isinstance(payload, dict) or payload.get("ok") is False: return payload - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: target = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, ) except Exception as exc: return {"ok": False, "error": str(exc)} @@ -172,9 +173,9 @@ async def drive_download_to_workspace( target.write_bytes(raw_content) try: - relative_path = str(target.relative_to(workspace_root)) + relative_path = str(target.relative_to(paths.workspace_root)) except ValueError: - relative_path = str(target.relative_to(worker_dir)) + relative_path = str(target.relative_to(paths.worker_dir)) return { "ok": True, @@ -203,13 +204,14 @@ async def drive_upload_from_workspace( if not path: return {"ok": False, "error": "path is required."} - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: source = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, ) except Exception as exc: @@ -257,13 +259,14 @@ async def drive_update_from_workspace( if not file_id: return {"ok": False, "error": "file_id is required."} - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: source = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, ) except Exception as exc: diff --git a/src/octopal/tools/filesystem/files.py b/src/octopal/tools/filesystem/files.py index e19436a5..705d97b5 100644 --- a/src/octopal/tools/filesystem/files.py +++ b/src/octopal/tools/filesystem/files.py @@ -1,22 +1,41 @@ from __future__ import annotations import shutil +from dataclasses import dataclass from pathlib import Path, PurePosixPath from typing import Any from octopal.tools.filesystem.path_safety import WorkspacePathError, resolve_workspace_path -def _get_paths(ctx: dict[str, Any] | Path) -> tuple[Path, Path, list[str] | None]: +@dataclass(frozen=True) +class _FilesystemPaths: + workspace_root: Path + worker_dir: Path + allowed_paths: list[str] | None + restrict_to_allowed_paths: bool = False + + +def _get_paths(ctx: dict[str, Any] | Path) -> _FilesystemPaths: if isinstance(ctx, Path): - return ctx, ctx, None + return _FilesystemPaths(ctx, ctx, None) worker_dir = Path(ctx["base_dir"]) workspace_root = Path(ctx.get("workspace_root") or worker_dir) worker = ctx.get("worker") - allowed_paths = getattr(worker.spec, "allowed_paths", None) if worker and hasattr(worker, "spec") else None - - return workspace_root, worker_dir, list(allowed_paths) if allowed_paths is not None else None + allowed_paths = ( + getattr(worker.spec, "allowed_paths", None) if worker and hasattr(worker, "spec") else None + ) + explicit_allowed_paths = ctx.get("allowed_paths") + if explicit_allowed_paths is not None: + allowed_paths = explicit_allowed_paths + + return _FilesystemPaths( + workspace_root=workspace_root, + worker_dir=worker_dir, + allowed_paths=list(allowed_paths) if allowed_paths is not None else None, + restrict_to_allowed_paths=bool(ctx.get("restrict_to_allowed_paths")), + ) def _normalized_parts(raw_path: str) -> tuple[str, ...]: @@ -61,10 +80,24 @@ def _resolve_tool_path( allowed_paths: list[str] | None, must_exist: bool = False, allow_final_symlink: bool = False, + restrict_to_allowed_paths: bool = False, ) -> Path: + if restrict_to_allowed_paths: + if allowed_paths is None: + raise WorkspacePathError("access denied: no allowed paths configured") + return resolve_workspace_path( + workspace_root, + raw_path, + must_exist=must_exist, + allow_final_symlink=allow_final_symlink, + allowed_paths=allowed_paths, + ) + target_root = worker_dir target_allowlist = None - if _is_shared_workspace_path(raw_path, workspace_root=workspace_root, allowed_paths=allowed_paths): + if _is_shared_workspace_path( + raw_path, workspace_root=workspace_root, allowed_paths=allowed_paths + ): target_root = workspace_root target_allowlist = allowed_paths return resolve_workspace_path( @@ -78,13 +111,14 @@ def _resolve_tool_path( def fs_read(args: dict[str, Any], ctx: dict[str, Any]) -> str: path = str(args.get("path", "")).strip() - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: target = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, ) return target.read_text(encoding="utf-8") @@ -97,13 +131,14 @@ def fs_read(args: dict[str, Any], ctx: dict[str, Any]) -> str: def fs_write(args: dict[str, Any], ctx: dict[str, Any]) -> str: path = str(args.get("path", "")).strip() content = str(args.get("content", "")) - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: target = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, ) target.parent.mkdir(parents=True, exist_ok=True) target.write_text(content, encoding="utf-8") @@ -116,13 +151,14 @@ def fs_write(args: dict[str, Any], ctx: dict[str, Any]) -> str: def fs_list(args: dict[str, Any], ctx: dict[str, Any]) -> str: path = str(args.get("path", "")).strip() or "." - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: target = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, ) if not target.is_dir(): @@ -138,7 +174,7 @@ def fs_list(args: dict[str, Any], ctx: dict[str, Any]) -> str: def fs_move(args: dict[str, Any], ctx: dict[str, Any]) -> str: source = str(args.get("source", "")).strip() destination = str(args.get("destination", "")).strip() - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) if not source: return "fs_move error: source is required." if not destination: @@ -146,16 +182,18 @@ def fs_move(args: dict[str, Any], ctx: dict[str, Any]) -> str: try: src = _resolve_tool_path( source, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, ) dst = _resolve_tool_path( destination, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, ) dst.parent.mkdir(parents=True, exist_ok=True) shutil.move(str(src), str(dst)) @@ -168,13 +206,14 @@ def fs_move(args: dict[str, Any], ctx: dict[str, Any]) -> str: def fs_delete(args: dict[str, Any], ctx: dict[str, Any]) -> str: path = str(args.get("path", "")).strip() - workspace_root, worker_dir, allowed_paths = _get_paths(ctx) + paths = _get_paths(ctx) try: target = _resolve_tool_path( path, - workspace_root=workspace_root, - worker_dir=worker_dir, - allowed_paths=allowed_paths, + workspace_root=paths.workspace_root, + worker_dir=paths.worker_dir, + allowed_paths=paths.allowed_paths, + restrict_to_allowed_paths=paths.restrict_to_allowed_paths, must_exist=True, allow_final_symlink=True, ) diff --git a/tests/test_filesystem_hardening.py b/tests/test_filesystem_hardening.py index 8d545965..3b47eb69 100644 --- a/tests/test_filesystem_hardening.py +++ b/tests/test_filesystem_hardening.py @@ -80,6 +80,27 @@ def test_download_file_rejects_filename_with_directories(tmp_path: Path) -> None assert "filename must not contain directory components" in payload +def test_fs_write_restricted_context_allows_only_configured_workspace_paths(tmp_path: Path) -> None: + workspace = tmp_path / "workspace" + (workspace / "reports").mkdir(parents=True) + workspace.mkdir(parents=True, exist_ok=True) + + ctx = { + "base_dir": workspace, + "workspace_root": workspace, + "allowed_paths": ["reports", "artifacts"], + "restrict_to_allowed_paths": True, + } + + assert fs_write({"path": "reports/out.md", "content": "ok"}, ctx) == "fs_write ok" + assert (workspace / "reports" / "out.md").read_text(encoding="utf-8") == "ok" + + result = fs_write({"path": "mcp_servers.json", "content": "pwn"}, ctx) + assert result.startswith("fs_write error:") + assert "outside allowed paths" in result + assert not (workspace / "mcp_servers.json").exists() + + def test_fs_tools_keep_worker_scratch_and_allow_explicit_shared_paths(tmp_path: Path) -> None: workspace = tmp_path / "workspace" worker_dir = workspace / "workers" / "worker-1" diff --git a/tests/test_router_tool_budget.py b/tests/test_router_tool_budget.py index 5a2721ed..46a29342 100644 --- a/tests/test_router_tool_budget.py +++ b/tests/test_router_tool_budget.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +from pathlib import Path from types import SimpleNamespace from octopal.infrastructure.config.models import A2AConfig, A2APeerConfig @@ -374,15 +375,9 @@ class DummyOcto: assert internal_maintenance_ctx["mcp_refresh_attempted"] is False assert "mcp_agentmail_list_inboxes" not in {tool.name for tool in heartbeat_tools} assert "mcp_agentmail_list_inboxes" not in {tool.name for tool in scheduler_tools} - assert "mcp_agentmail_list_inboxes" not in { - tool.name for tool in scheduled_octo_control_tools - } - assert "mcp_agentmail_list_inboxes" not in { - tool.name for tool in internal_maintenance_tools - } - assert {"list_workers", "list_active_workers"}.issubset( - {tool.name for tool in scheduler_tools} - ) + assert "mcp_agentmail_list_inboxes" not in {tool.name for tool in scheduled_octo_control_tools} + assert "mcp_agentmail_list_inboxes" not in {tool.name for tool in internal_maintenance_tools} + assert {"list_workers", "list_active_workers"}.issubset({tool.name for tool in scheduler_tools}) assert { "list_workers", "list_active_workers", @@ -488,6 +483,31 @@ class DummyOcto: assert {tool.name for tool in tools} == {"manage_canon", "get_worker_output_path", "fs_write"} +def test_worker_followup_fs_write_context_is_limited_to_durable_artifacts( + monkeypatch, tmp_path: Path +) -> None: + monkeypatch.setenv("OCTOPAL_WORKSPACE_DIR", str(tmp_path)) + + class DummyOcto: + mcp_manager = None + + tools, ctx = _get_worker_followup_tools(DummyOcto(), 123) + fs_write_tool = next(tool for tool in tools if tool.name == "fs_write") + + assert ctx["base_dir"] == tmp_path.resolve() + assert ctx["workspace_root"] == tmp_path.resolve() + assert ctx["allowed_paths"] == ["reports", "artifacts"] + assert ctx["restrict_to_allowed_paths"] is True + + assert fs_write_tool.handler({"path": "reports/out.md", "content": "ok"}, ctx) == "fs_write ok" + assert (tmp_path / "reports" / "out.md").read_text(encoding="utf-8") == "ok" + + blocked = fs_write_tool.handler({"path": "mcp_servers.json", "content": "pwn"}, ctx) + assert blocked.startswith("fs_write error:") + assert "outside allowed paths" in blocked + assert not (tmp_path / "mcp_servers.json").exists() + + def test_worker_followup_route_skips_planner_and_uses_narrow_tools(monkeypatch) -> None: import octopal.runtime.octo.router as router @@ -1067,7 +1087,9 @@ async def scenario() -> None: asyncio.run(scenario()) -def test_image_fallback_without_saved_paths_uses_channel_neutral_directory(monkeypatch, tmp_path) -> None: +def test_image_fallback_without_saved_paths_uses_channel_neutral_directory( + monkeypatch, tmp_path +) -> None: monkeypatch.setenv("OCTOPAL_WORKSPACE_DIR", str(tmp_path)) saved_paths = _decode_and_save_images(["data:image/jpeg;base64,SGVsbG8="])