From cdc7d61d0bada85e0b6a1480e087d482d60ab429 Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 15:28:34 +0000 Subject: [PATCH 1/3] Add screenshot command for capturing visible tab viewport Implements the `browser-cli screenshot ` command with `--output`, `--full-page`, and `--quality` options. The extension uses `chrome.tabs.captureVisibleTab()` to capture the viewport and returns base64-encoded image data. The CLI decodes and writes the image to disk. Full-page capture is accepted but prints a warning as it is not yet implemented. Closes #4 --- Cargo.toml | 1 + extension/manifest.json | 2 +- extension/src/background/service-worker.ts | 39 ++++++++++++ src/cli/commands.rs | 69 ++++++++++++++++++++++ src/main.rs | 24 ++++++++ src/protocol/messages.rs | 1 + 6 files changed, 135 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a90acb5..084872a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ toml = "0.9" anyhow = "1.0" uuid = { version = "1.0", features = ["v4"] } url = "2.5" +base64 = "0.22" [target.'cfg(windows)'.dependencies] winreg = "0.52" diff --git a/extension/manifest.json b/extension/manifest.json index f3b2f5b..1d436f3 100644 --- a/extension/manifest.json +++ b/extension/manifest.json @@ -3,7 +3,7 @@ "name": "Browser CLI", "version": "0.2.1", "description": "Bridges the browser to browser-cli: manages sessions, routes commands to content scripts, and streams structured page snapshots over Native Messaging.", - "permissions": ["nativeMessaging", "tabs", "scripting", "storage", "webNavigation"], + "permissions": ["nativeMessaging", "tabs", "scripting", "storage", "webNavigation", "activeTab"], "background": { "scripts": ["dist/background/service-worker.js"], "service_worker": "dist/background/service-worker.js" diff --git a/extension/src/background/service-worker.ts b/extension/src/background/service-worker.ts index 3401856..5d8687c 100644 --- a/extension/src/background/service-worker.ts +++ b/extension/src/background/service-worker.ts @@ -85,6 +85,8 @@ async function handleRequest(req: Request): Promise { case 'type': case 'wait': return await forwardToContent(req); + case 'screenshot': + return await handleScreenshot(req); default: return { id: req.id, ok: false, error: `Unknown action: ${req.action}` }; } @@ -295,6 +297,43 @@ async function forwardToContent(req: Request): Promise { }; } +async function handleScreenshot(req: Request): Promise { + const session = sessionFromRequest(req); + if (!session.ok) { + return { id: req.id, ok: false, error: session.error }; + } + + await ensureTabLoaded(session.value.tab_id); + + const quality = typeof req.params.quality === 'number' ? req.params.quality : undefined; + const format: 'png' | 'jpeg' = quality !== undefined ? 'jpeg' : 'png'; + const options: chrome.tabs.CaptureVisibleTabOptions = { format }; + if (quality !== undefined) { + options.quality = quality; + } + + // Get the window ID for the session's tab + const tab = await chrome.tabs.get(session.value.tab_id); + if (!tab.windowId) { + return { id: req.id, ok: false, error: 'Could not determine window for tab' }; + } + + // Ensure the tab is active in its window before capturing + await chrome.tabs.update(session.value.tab_id, { active: true }); + + const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, options); + const base64Data = dataUrl.split(',')[1]; + + return { + id: req.id, + ok: true, + data: { + image: base64Data, + format: format, + }, + }; +} + function sessionFromRequest( req: Request, ): { ok: true; value: Session } | { ok: false; error: string } { diff --git a/src/cli/commands.rs b/src/cli/commands.rs index 45e7688..43c74fa 100644 --- a/src/cli/commands.rs +++ b/src/cli/commands.rs @@ -920,6 +920,75 @@ pub fn plugin_list(json_mode: bool) -> Result<()> { Ok(()) } +pub async fn screenshot( + session_id: &str, + output: Option<&str>, + full_page: bool, + quality: Option, + json_mode: bool, +) -> Result<()> { + use base64::Engine as _; + use std::time::{SystemTime, UNIX_EPOCH}; + + if full_page { + eprintln!("Warning: --full-page is not yet supported; capturing viewport only."); + } + + let mut params = json!({ "session_id": session_id, "full_page": false }); + if let Some(q) = quality { + params["quality"] = json!(q); + } + + let data = send_ok(Request::new(actions::SCREENSHOT, params)).await?; + + let image_b64 = data + .get("image") + .and_then(|v| v.as_str()) + .ok_or_else(|| anyhow::anyhow!("missing image data in response"))?; + let format = data + .get("format") + .and_then(|v| v.as_str()) + .unwrap_or("png"); + + let image_bytes = base64::engine::general_purpose::STANDARD + .decode(image_b64) + .map_err(|e| anyhow::anyhow!("failed to decode base64 image: {e}"))?; + + let extension = if format == "jpeg" { "jpg" } else { "png" }; + let output_path = match output { + Some(p) => PathBuf::from(p), + None => { + let ts = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap_or_default() + .as_millis(); + PathBuf::from(format!("screenshot-{ts}.{extension}")) + } + }; + + fs::write(&output_path, &image_bytes)?; + + let size_bytes = image_bytes.len(); + + if json_mode { + print_json(&json!({ + "session_id": session_id, + "path": output_path.display().to_string(), + "format": format, + "size_bytes": size_bytes, + }))?; + } else { + println!( + "Screenshot saved: {} ({} bytes, {})", + output_path.display(), + size_bytes, + format + ); + } + + Ok(()) +} + async fn fetch_snapshot( session_id: &str, action: &str, diff --git a/src/main.rs b/src/main.rs index 65ca574..4cd8a84 100644 --- a/src/main.rs +++ b/src/main.rs @@ -276,6 +276,23 @@ enum Command { #[arg(long, short = 'v')] verbose: bool, }, + /// Capture screenshot of the current page + Screenshot { + /// Session ID + session_id: String, + /// Output file path (default: screenshot-.png) + #[arg(short, long)] + output: Option, + /// Capture full page instead of just the viewport + #[arg(long)] + full_page: bool, + /// Image quality for JPEG (0-100, default: PNG format) + #[arg(long)] + quality: Option, + /// Output as JSON + #[arg(long)] + json: bool, + }, /// Manage and run plugins Plugin { #[command(subcommand)] @@ -465,6 +482,13 @@ async fn main() -> anyhow::Result<()> { json, verbose, } => cli::commands::view(session_id, target, page, fresh, json, verbose).await?, + Command::Screenshot { + ref session_id, + ref output, + full_page, + quality, + json, + } => cli::commands::screenshot(session_id, output.as_deref(), full_page, quality, json).await?, Command::Plugin { ref cmd } => match cmd { PluginCommand::Run { name, diff --git a/src/protocol/messages.rs b/src/protocol/messages.rs index 24eaf2c..753e131 100644 --- a/src/protocol/messages.rs +++ b/src/protocol/messages.rs @@ -152,6 +152,7 @@ pub mod actions { pub const TYPE: &str = "type"; pub const WAIT: &str = "wait"; pub const GET_TEXT: &str = "get_text"; + pub const SCREENSHOT: &str = "screenshot"; } pub const PAGE_CHUNK_TYPE: &str = "page_chunk"; From 984dfd5daa0f34e01d9a94ac1843379e49e8dc3e Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 15:43:51 +0000 Subject: [PATCH 2/3] Fix TS2724: replace non-existent CaptureVisibleTabOptions with inferred parameter type Use `Parameters[1]` instead of the non-existent `chrome.tabs.CaptureVisibleTabOptions` type. --- extension/src/background/service-worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extension/src/background/service-worker.ts b/extension/src/background/service-worker.ts index 5d8687c..f6aa756 100644 --- a/extension/src/background/service-worker.ts +++ b/extension/src/background/service-worker.ts @@ -307,7 +307,7 @@ async function handleScreenshot(req: Request): Promise { const quality = typeof req.params.quality === 'number' ? req.params.quality : undefined; const format: 'png' | 'jpeg' = quality !== undefined ? 'jpeg' : 'png'; - const options: chrome.tabs.CaptureVisibleTabOptions = { format }; + const options: Parameters[1] = { format }; if (quality !== undefined) { options.quality = quality; } From 8e9ed9324e2ab846431ee9b3cb4898c8be327db2 Mon Sep 17 00:00:00 2001 From: Mule Date: Sat, 28 Mar 2026 16:08:35 +0000 Subject: [PATCH 3/3] Address review: restore active tab after capture and validate quality range --- extension/src/background/service-worker.ts | 8 ++++++++ src/main.rs | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/extension/src/background/service-worker.ts b/extension/src/background/service-worker.ts index f6aa756..f1f8d73 100644 --- a/extension/src/background/service-worker.ts +++ b/extension/src/background/service-worker.ts @@ -318,12 +318,20 @@ async function handleScreenshot(req: Request): Promise { return { id: req.id, ok: false, error: 'Could not determine window for tab' }; } + // Record the currently active tab so we can restore focus after capture + const [previousTab] = await chrome.tabs.query({ active: true, windowId: tab.windowId }); + // Ensure the tab is active in its window before capturing await chrome.tabs.update(session.value.tab_id, { active: true }); const dataUrl = await chrome.tabs.captureVisibleTab(tab.windowId, options); const base64Data = dataUrl.split(',')[1]; + // Restore previously active tab if it was different + if (previousTab && previousTab.id !== undefined && previousTab.id !== tab.id) { + await chrome.tabs.update(previousTab.id, { active: true }); + } + return { id: req.id, ok: true, diff --git a/src/main.rs b/src/main.rs index 4cd8a84..3d114f4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -287,7 +287,7 @@ enum Command { #[arg(long)] full_page: bool, /// Image quality for JPEG (0-100, default: PNG format) - #[arg(long)] + #[arg(long, value_parser = clap::value_parser!(u32).range(0..=100))] quality: Option, /// Output as JSON #[arg(long)]