diff --git a/.gitignore b/.gitignore index 0c395504..7a60a7fa 100644 --- a/.gitignore +++ b/.gitignore @@ -346,3 +346,6 @@ FodyWeavers.xsd Output/ *.lscache test_ws.py + +# Local visual test output +visual-test-output/ diff --git a/AGENTS.md b/AGENTS.md index 07e3ac67..ff78ede9 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,4 +22,5 @@ If a command fails: Notes: - If a build/test is blocked by an environmental lock (for example running executable locking output assemblies), stop/close the locking process and rerun. +- Tray tests must isolate `SettingsManager` from real user settings. Do not use `new SettingsManager()` in tests unless the test intentionally reads `%APPDATA%\OpenClawTray\settings.json`; pass a temp settings directory or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts. - Do not claim completion without reporting validation results. diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 044b8e52..01774e61 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -87,7 +87,7 @@ OpenClaw.Tray.Tests ──tests──▶ OpenClaw.Shared |-----------|----------|---------| | **Gateway Communication** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | WebSocket client with protocol v3, reconnect/backoff logic | | **Notification System** | `OpenClaw.Tray.WinUI/App.xaml.cs` | Event routing, toast notifications, classification | -| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/WebChatWindow.xaml.cs` | Embedded chat panel with lifecycle management | +| **WebView2 Integration** | `OpenClaw.Tray.WinUI/Windows/ChatWindow.xaml.cs` | Embedded chat panel with lifecycle management | | **Tray Icon Management** | `OpenClaw.Tray.WinUI/Helpers/IconHelper.cs` | GDI handle management, dynamic icon generation | | **Session Tracking** | `OpenClaw.Shared/OpenClawGatewayClient.cs` | Session state, activity tracking, polling | | **Settings & Logging** | `OpenClaw.Tray.WinUI/Services/` | JSON settings persistence, file rotation logging | @@ -285,7 +285,7 @@ Notifications are classified using two strategies: ### WebView2 Lifecycle -The `WebChatWindow` uses Microsoft Edge WebView2 for embedded web content: +The `ChatWindow` uses Microsoft Edge WebView2 for embedded web content: **Initialization:** 1. WebView2 control created in XAML @@ -299,7 +299,7 @@ Window Created → WebView2.EnsureCoreWebView2Async() → Navigate to Chat URL ``` **Key Design Decisions:** -- **Singleton pattern**: Only one WebChat window instance exists +- **Singleton pattern**: Only one chat window instance exists - **Hidden instead of disposed**: Window is hidden when closed to preserve state - **Separate user data folder**: Isolates cookies/storage from browser - **Navigation guard**: Prevents accidental navigation away from chat @@ -425,8 +425,8 @@ dotnet test --filter "FullyQualifiedName~AgentActivityTests" ``` **Test Coverage:** -- ✅ **478 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting -- ✅ **93 tests** in `OpenClaw.Tray.Tests` — menu display, menu positioning, settings round-trip, deep link parsing +- ✅ **1182 tests** in `OpenClaw.Shared.Tests` — models, gateway client, exec approvals, capabilities, URL helpers, notification categorization, shell quoting, MCP, device identity, and WinNode client coverage +- ✅ **388 tests** in `OpenClaw.Tray.Tests` — settings round-trip, deep link parsing, onboarding state, setup code decoder, gateway health/chat helpers, security validation, wizard step parsing, gateway discovery, localization validation - ✅ All tests are pure unit tests (no network, no file system, no external dependencies) See [tests/OpenClaw.Shared.Tests/README.md](tests/OpenClaw.Shared.Tests/README.md) for detailed test documentation. @@ -441,7 +441,7 @@ You can test the UI and basic functionality without a running gateway: 3. Enter a dummy gateway URL (e.g., `ws://localhost:18789`) 4. The app will show "Disconnected" status but you can: - Test the tray menu structure - - Open Settings dialog and configure preferences + - Open the Settings page and configure preferences - Test auto-start functionality - View logs @@ -487,8 +487,8 @@ You can test the UI and basic functionality without a running gateway: - Verify Windows toast notification appears (if enabled) - Click toast → should open relevant UI -2. **Notification History**: - - Right-click tray → **Notification History** +2. **Activity / notification history**: + - Right-click tray → **Activity Stream** or **Notification History** - Verify past notifications are listed - Test filtering by category @@ -747,6 +747,51 @@ gh run download --repo shanselman/openclaw-windows-hub - **Discussions**: [GitHub Discussions](https://github.com/shanselman/openclaw-windows-hub/discussions) - **Documentation**: [OpenClaw Docs](https://docs.molt.bot) +## Developing & Testing the Onboarding Wizard + +The onboarding wizard is a 6-screen flow built with OpenClaw's minimal FunctionalUI helper layer for declarative C# WinUI. The chat page uses a WebView2 overlay for visual consistency with the post-setup chat experience. + +### Building + +The WinUI project requires platform-specific build targets. Use the build script: + +```bash +./build.ps1 -Project WinUI # Builds with correct -r win-x64 targets +``` + +Direct `dotnet build` without the script will fail with "WindowsAppSDKSelfContained requires a supported Windows architecture". + +### Environment Variables + +| Variable | Purpose | +|----------|---------| +| `OPENCLAW_FORCE_ONBOARDING=1` | Show onboarding wizard even if a token already exists | +| `OPENCLAW_SKIP_UPDATE_CHECK=1` | Skip the update dialog (useful during testing) | +| `OPENCLAW_LANGUAGE=fr-fr` | Override UI language (validated: en-us, fr-fr, nl-nl, zh-cn, zh-tw) | +| `OPENCLAW_GATEWAY_PORT=19001` | Override default gateway port for local dev | +| `OPENCLAW_VISUAL_TEST=1` | Enable automatic screenshot capture on page transitions | +| `OPENCLAW_VISUAL_TEST_DIR=path` | Output directory for visual test screenshots | + +### Testing the Wizard Locally + +1. Start a local gateway (e.g., in WSL): `cd ~/openclaw && npx openclaw gateway` +2. Set env vars: + ```powershell + $env:OPENCLAW_FORCE_ONBOARDING = "1" + $env:OPENCLAW_SKIP_UPDATE_CHECK = "1" + ``` +3. Build and run: `./build.ps1 -Project WinUI` then launch the exe +4. Navigate through all 6 screens to verify + +### Architecture + +- **FunctionalUI**: `src/OpenClawTray.FunctionalUI/` — Minimal declarative WinUI helper layer used by onboarding +- **Pages**: `src/OpenClaw.Tray.WinUI/Onboarding/Pages/` — Functional UI components for each wizard screen +- **Services**: `src/OpenClaw.Tray.WinUI/Onboarding/Services/` — State management, setup code decoder, permission checker, health check, input validation +- **Widgets**: `src/OpenClaw.Tray.WinUI/Onboarding/Widgets/` — Shared UI components (cards, step indicators, feature rows) +- **Window**: `src/OpenClaw.Tray.WinUI/Onboarding/OnboardingWindow.cs` — Host window with WebView2 overlay for chat +- **Helpers**: `src/OpenClaw.Tray.WinUI/Helpers/GatewayChatHelper.cs` — Shared WebView2 chat URL builder + --- *Made with 🦞 love by Scott Hanselman and the OpenClaw community* diff --git a/README.md b/README.md index 26815f37..ea677b44 100644 --- a/README.md +++ b/README.md @@ -98,14 +98,14 @@ Modern Windows 11-style system tray companion that connects to your local OpenCl - 🌐 **Web Chat** - Embedded chat window with WebView2 - 📊 **Live Status** - Real-time sessions, channels, and usage display - 🧭 **Command Center** - Dense gateway, channel, usage, node, pairing, and allowlist diagnostics from one window -- ⚡ **Activity Stream** - Dedicated flyout for live session, usage, node, and notification events +- ⚡ **Activity Stream** - Command Center page for live session, usage, node, and notification events - 🔔 **Toast Notifications** - Clickable Windows notifications with [smart categorization](docs/NOTIFICATION_CATEGORIZATION.md) - 📡 **Channel Control** - Start/stop Telegram & WhatsApp from the menu - 🖥️ **Node Observability** - Node inventory with online/offline state and copyable summary - ⏱ **Cron Jobs** - Quick access to scheduled tasks - 🚀 **Auto-start** - Launch with Windows -- ⚙️ **Settings** - Full configuration dialog -- 🎯 **First-run experience** - Welcome dialog guides new users +- ⚙️ **Settings** - Full configuration page +- 🎯 **First-run onboarding** — 6-screen setup wizard (connection, permissions, chat, configuration) #### Quick Send scope requirement @@ -123,7 +123,7 @@ If Quick Send fails with `pairing required` / `NOT_PAIRED`, that is a **device a ### Menu Sections - **Status** - Gateway connection status with click-to-view details -- **Command Center** - Status detail window with diagnostics, channel health, usage, sessions, nodes, and copyable repair commands +- **Command Center** - Hub with diagnostics, channel health, usage, sessions, nodes, and copyable repair commands - **Sessions** - Active agent sessions with preview and per-session controls - **Usage** - Provider/cost summary with quick jump to activity details - **Channels** - Telegram/WhatsApp status with toggle control @@ -164,7 +164,7 @@ These features are available in Windows but not in the Mac app: | Channel control | Start/stop Telegram & WhatsApp | | Modern flyout menu | Windows 11-style with dark/light mode | | Deep links | `openclaw://` URL scheme with IPC | -| First-run welcome | Guided onboarding for new users | +| First-run onboarding | 6-screen guided setup wizard (Welcome → Connection → Wizard → Permissions → Chat → Ready) | | PowerToys integration | Command Palette extension | ### 🔌 Node Mode (Agent Control) @@ -179,6 +179,9 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t | **Camera** | `camera.list`, `camera.snap`, `camera.clip` | Enumerate cameras and capture still photos or short video clips | | **Location** | `location.get` | Return Windows geolocation when permission is available | | **Device** | `device.info`, `device.status` | Return Windows host/app metadata and lightweight status | +| **Text-to-speech** | `tts.speak` | Speak text aloud through Windows speech synthesis, or ElevenLabs when configured | + +Packaged installs declare camera, microphone, and location capabilities. Windows may ask for consent the first time a node capability uses one of those protected resources. #### Node Setup @@ -205,23 +208,24 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t "canvas.hide", "canvas.navigate", "canvas.eval", - "canvas.snapshot", - "canvas.a2ui.push", - "canvas.a2ui.pushJSONL", - "canvas.a2ui.reset", - "screen.snapshot", - "camera.list", - "camera.snap", - "camera.clip", - "location.get", - "device.info", - "device.status" + "canvas.snapshot", + "canvas.a2ui.push", + "canvas.a2ui.pushJSONL", + "canvas.a2ui.reset", + "screen.snapshot", + "camera.list", + "camera.snap", + "camera.clip", + "location.get", + "device.info", + "device.status", + "tts.speak" ] - } - } + } + } } ``` - > ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work! Privacy-sensitive commands such as `screen.record` should only be added to `allowCommands` when you explicitly want to allow them. + > ⚠️ **Important**: The gateway has a server-side allowlist. Commands must be listed explicitly - wildcards like `canvas.*` don't work! Privacy-sensitive commands such as `screen.record` and agent-driven audio playback via `tts.speak` should only be added to `allowCommands` when you explicitly want to allow them. 5. **Test it** from your Mac/gateway: ```bash @@ -249,6 +253,9 @@ When Node Mode is enabled in Settings, your Windows PC becomes a **node** that t # Take a photo (NV12/MediaCapture fallback) openclaw nodes invoke --node --command camera.snap --params '{"deviceId":"","format":"jpeg","quality":80}' + # Speak text aloud on the Windows node (requires TTS enabled in Settings and tts.speak allowed on the gateway) + openclaw nodes invoke --node --command tts.speak --params '{"text":"Hello from OpenClaw","provider":"windows"}' + # Execute a command on the Windows node openclaw nodes invoke --node --command system.run --params '{"command":"Get-Process | Select -First 5","shell":"powershell","timeoutMs":10000}' @@ -294,12 +301,12 @@ OpenClaw registers the `openclaw://` URL scheme for automation and integration: | Link | Description | |------|-------------| -| `openclaw://settings` | Open Settings dialog | +| `openclaw://settings` | Open the Settings page | | `openclaw://setup` | Open Setup Wizard | -| `openclaw://chat` | Open Web Chat window | +| `openclaw://chat` | Open the Chat page | | `openclaw://commandcenter` | Open Command Center diagnostics | -| `openclaw://activity` | Open Activity Stream | -| `openclaw://history` | Open Notification History | +| `openclaw://activity` | Open the Activity page | +| `openclaw://history` | Open the Activity page filtered to notification history | | `openclaw://dashboard` | Open Dashboard in browser | | `openclaw://dashboard/sessions` | Open specific dashboard page | | `openclaw://dashboard/channels` | Open Channels dashboard page | @@ -336,15 +343,15 @@ PowerToys Command Palette extension for quick OpenClaw access. - **📡 Dashboard: Channels** - Open the channel configuration dashboard - **🧩 Dashboard: Skills** - Open the skills dashboard - **⏱️ Dashboard: Cron** - Open the scheduled jobs dashboard -- **💬 Web Chat** - Open the embedded Web Chat window +- **💬 Web Chat** - Open the embedded Chat page - **📝 Quick Send** - Open the Quick Send dialog to compose a message - **🧭 Setup Wizard** - Open pairing/setup - **🧭 Command Center** - Open diagnostics and support actions - **🔄 Run Health Check** - Refresh connection health - **⬇️ Check for Updates** - Run a manual GitHub Releases update check - **⚡ Activity Stream** - Open recent activity -- **📋 Notification History** - Open notification history -- **⚙️ Settings** - Open the OpenClaw Tray Settings dialog +- **📋 Notification History** - Open notification history in the Activity page +- **⚙️ Settings** - Open the OpenClaw Tray Settings page - **📄 Open Log File / 📁 Logs / 🗂️ Config / 🧪 Diagnostics** - Open support files and folders - **📋 Copy Support Context** - Copy redacted Command Center metadata - **🧰 Copy Debug Bundle** - Copy combined support, port, capability, node, channel, and activity diagnostics @@ -402,10 +409,16 @@ Default gateway: `ws://localhost:18789` ### First Run -On first run without a token, Molty displays a welcome dialog that: -1. Explains what's needed to get started -2. Links to [documentation](https://docs.molt.bot/web/dashboard) for token setup -3. Opens Settings to configure the connection +On first run, Molty launches a guided onboarding wizard that walks you through setup: + +1. **Welcome** — introduces OpenClaw and starts the setup flow +2. **Connection** — choose Local gateway, Remote gateway, or configure later. Paste a setup code or enter gateway URL and token manually. Tests the connection with Ed25519 device authentication. +3. **Wizard** — gateway-driven configuration steps (AI provider selection, personality setup, communication channels). Steps are defined by your gateway. +4. **Permissions** — reviews Windows system permissions (notifications, camera, microphone, screen capture, location) and links to system settings to grant them. +5. **Chat** — meet your agent in a live chat powered by the gateway's web UI. +6. **Ready** — summary of available features, option to launch at startup, and a Finish button. + +For detailed setup instructions, see [docs/SETUP.md](docs/SETUP.md). For the full onboarding architecture, see [docs/ONBOARDING_WIZARD.md](docs/ONBOARDING_WIZARD.md). ## License diff --git a/build.ps1 b/build.ps1 index 13cb7e86..e7174331 100644 --- a/build.ps1 +++ b/build.ps1 @@ -23,7 +23,7 @@ #> param( - [ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette", "Cli")] + [ValidateSet("All", "Tray", "WinUI", "Shared", "CommandPalette", "Cli", "WinNodeCli")] [string]$Project = "All", [ValidateSet("Debug", "Release")] @@ -188,12 +188,13 @@ function Build-Project($name, $path, $useRid = $false) { $projects = @{ "Shared" = @{ Path = "src/OpenClaw.Shared/OpenClaw.Shared.csproj"; UseRid = $false } "Cli" = @{ Path = "src/OpenClaw.Cli/OpenClaw.Cli.csproj"; UseRid = $false } + "WinNodeCli" = @{ Path = "src/OpenClaw.WinNode.Cli/OpenClaw.WinNode.Cli.csproj"; UseRid = $false } "Tray" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true } "WinUI" = @{ Path = "src/OpenClaw.Tray.WinUI/OpenClaw.Tray.WinUI.csproj"; UseRid = $true } "CommandPalette" = @{ Path = "src/OpenClaw.CommandPalette/OpenClaw.CommandPalette.csproj"; UseRid = $false } } -$toBuild = if ($Project -eq "All") { @("Shared", "Cli", "WinUI") } else { @($Project) } +$toBuild = if ($Project -eq "All") { @("Shared", "Cli", "WinNodeCli", "WinUI") } else { @($Project) } # Always build Shared first if building other projects if ($Project -ne "Shared" -and $Project -ne "All" -and $toBuild -notcontains "Shared") { diff --git a/docs/LOCALIZATION.md b/docs/LOCALIZATION.md index 9e077bd8..67f35a25 100644 --- a/docs/LOCALIZATION.md +++ b/docs/LOCALIZATION.md @@ -7,7 +7,10 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa | Language | Locale | Resource File | |----------|--------|---------------| | English (US) | `en-us` | `Strings/en-us/Resources.resw` | +| French (France) | `fr-fr` | `Strings/fr-fr/Resources.resw` | +| Dutch (Netherlands) | `nl-nl` | `Strings/nl-nl/Resources.resw` | | Chinese (Simplified) | `zh-cn` | `Strings/zh-cn/Resources.resw` | +| Chinese (Traditional) | `zh-tw` | `Strings/zh-tw/Resources.resw` | ## Adding a New Language @@ -43,7 +46,7 @@ OpenClaw Tray uses WinUI `.resw` resource files for localization. Windows automa 5. **Do not translate resource key names** (the `name` attribute). Only translate `` content. -6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system automatically discovers new locale folders. +6. **Submit a pull request** with just your new `Resources.resw` file. No code changes are needed — the build system and localization tests automatically discover new locale folders. ## How It Works @@ -65,16 +68,16 @@ Windows picks the language automatically based on the user's OS display language ## Testing a Language Locally -To test a specific locale without changing your Windows language: +Set the `OPENCLAW_LANGUAGE` environment variable before launching the app: -1. Open `src/OpenClaw.Tray.WinUI/App.xaml.cs` -2. Add this line at the top of the `App()` constructor, **before** `InitializeComponent()`: - ```csharp - LocalizationHelper.SetLanguageOverride("zh-CN"); - ``` -3. Build and run (`dotnet build src/OpenClaw.Tray.WinUI -r win-x64`). Remove the line when done testing. +```powershell +$env:OPENCLAW_LANGUAGE = "fr-fr" # or nl-nl, zh-cn, zh-tw +.\src\OpenClaw.Tray.WinUI\bin\Debug\net10.0-windows10.0.19041.0\win-x64\OpenClaw.Tray.WinUI.exe +``` -> **Note:** This overrides `LocalizationHelper.GetString()` calls (menus, toasts, dialogs, window titles). XAML `x:Uid` bindings follow the OS display language. For full XAML localization testing, change your Windows display language in Settings → Time & Language. +This overrides `LocalizationHelper.GetString()` calls for menus, toasts, dialogs, and the onboarding wizard. The language is validated against the supported locale list. + +> **Note:** XAML `x:Uid` bindings follow the OS display language. For full localization testing including XAML elements, change your Windows display language in Settings → Time & Language. ## Resource Key Naming Conventions @@ -87,12 +90,31 @@ To test a specific locale without changing your Windows language: | `Status_Name` | Status display text | `Status_Connected` | | `TimeAgo_Format` | Relative time strings | `TimeAgo_MinutesFormat` | +### Onboarding Key Namespace + +All onboarding wizard strings use the `Onboarding_` prefix: + +| Pattern | Used For | Example | +|---------|----------|---------| +| `Onboarding_PageName_Label` | Page titles, descriptions | `Onboarding_Welcome_Title` | +| `Onboarding_Connection_*` | Connection page labels/status | `Onboarding_Connection_TestConnection` | +| `Onboarding_Perm_*` | Permission names | `Onboarding_Perm_Camera` | +| `Onboarding_Ready_*` | Ready page elements | `Onboarding_Ready_Feature_Voice_Subtitle` | +| `Onboarding_Wizard_*` | Wizard page elements | `Onboarding_Wizard_Continue` | + ## Validation -Both resource files must have the **same set of keys**. You can verify with: +All resource files must have the **same set of keys**. Locale directories are discovered dynamically under `Strings/`, so adding a new `Strings//Resources.resw` file automatically brings it under validation. You can verify counts with: ```powershell -$en = (Select-String -Path "src\OpenClaw.Tray.WinUI\Strings\en-us\Resources.resw" -Pattern '\Resources.resw" -Pattern ': $new keys | Match: $($en -eq $new)" +$base = "src\OpenClaw.Tray.WinUI\Strings" +Get-ChildItem $base -Directory | ForEach-Object { + $loc = $_.Name + $count = (Select-String -Path "$base\$loc\Resources.resw" -Pattern '` flow; the Windows tray does not edit gateway pairing state directly. + +### Wizard +Renders server-defined setup steps via RPC (`wizard.start` / `wizard.next`). The gateway controls the flow — steps can be: +- **Note** — informational messages +- **Confirm** — yes/no decisions +- **Text** — free-form input (with PasswordBox for sensitive fields like API keys) +- **Select** — radio button choices (e.g., AI provider selection) +- **Progress** — loading indicator for background operations + +If the gateway doesn't support the wizard protocol or is unreachable, this screen shows an "offline" message and can be skipped. + +### Permissions +Checks 5 Windows permissions using native APIs and registry: +- Notifications (Toast capability) +- Camera (Windows.Devices.Enumeration) +- Microphone (Windows.Devices.Enumeration) +- Screen Capture (Graphics.Capture) +- Location (optional, registry-based) + +Each permission shows its current status (Enabled/Disabled/Allowed/Denied) with an "Open Settings" button linking to the relevant `ms-settings:` URI. + +### Chat +Embeds the gateway's web chat UI via WebView2, matching the post-setup `ChatWindow` for visual consistency. Uses the shared `GatewayChatHelper` for URL building and WebView2 initialization. + +On first load, a bootstrap message is auto-injected to kick off the gateway's first-run ritual (BOOTSTRAP.md). The message is safely encoded using `JsonSerializer.Serialize` to prevent XSS. + +### Ready +Displays 5 feature cards (Tray Menu, Channels, Voice, Canvas, Skills) with localized subtitles. Includes a "Launch at Login" toggle and a "Finish" button that saves settings and closes the wizard. + +## Security + +The onboarding wizard follows these security practices: + +- **XSS prevention**: Bootstrap messages encoded via `JsonSerializer.Serialize` for safe JS injection +- **Input validation**: Setup codes limited to 2KB, decoded JSON validated, gateway URLs checked via `GatewayUrlHelper` +- **URI scheme whitelists**: Only `ms-settings:` for permissions, `http/https` for chat +- **Navigation restriction**: WebView2 `NavigationStarting` handler blocks navigation to external origins +- **Token protection**: Query params stripped from all log output; WebView2 accelerator keys disabled +- **Gateway-owned pairing**: Device approval uses the gateway CLI/API path so scope checks, token issuance, audit, and broadcasts stay centralized +- **Error sanitization**: Exception details logged but not shown to users + +## Localization + +All user-visible strings use `LocalizationHelper.GetString()` with the `Onboarding_*` key namespace. Supported languages are discovered from the `Strings//Resources.resw` directories; the current locales are English, French, Dutch, Chinese Simplified, and Chinese Traditional. + +Translations are AI-generated following the repo convention. Technical terms (Gateway, Token, Node Mode) are kept in English across all locales. + +## Developer Guide + +See [DEVELOPMENT.md](../DEVELOPMENT.md#developing--testing-the-onboarding-wizard) for build instructions, environment variables, and testing workflow. + +### Test Isolation + +`SettingsManager` loads `%APPDATA%\OpenClawTray\settings.json` by default. Onboarding tests must not use `new SettingsManager()` without an isolated settings directory, because local user settings such as `EnableNodeMode=true` change page ordering by intentionally skipping operator-only Wizard and Chat pages. + +Use a temp settings directory for tests that construct `SettingsManager`, or set `OPENCLAW_TRAY_DATA_DIR` before the test process starts. + +### Key Files + +| Path | Purpose | +|------|---------| +| `Onboarding/OnboardingWindow.cs` | Host window with WebView2 overlay | +| `Onboarding/OnboardingApp.cs` | Functional UI root component, page navigation | +| `Onboarding/Services/OnboardingState.cs` | Shared state across all pages | +| `Onboarding/Pages/*.cs` | Individual wizard screens | +| `Onboarding/Services/SetupCodeDecoder.cs` | Base64url setup code parsing | +| `Onboarding/Services/InputValidator.cs` | Security input validation | +| `Onboarding/Services/WizardStepParser.cs` | Wizard JSON step parsing | +| `Onboarding/Services/LocalGatewayApprover.cs` | Local gateway URL classification | +| `Onboarding/Services/PermissionChecker.cs` | Windows permission checks | +| `Helpers/GatewayChatHelper.cs` | Shared WebView2 chat URL builder | diff --git a/docs/POWERTOYS.md b/docs/POWERTOYS.md index 522fda05..1d294443 100644 --- a/docs/POWERTOYS.md +++ b/docs/POWERTOYS.md @@ -39,15 +39,15 @@ Open Command Palette (`Win+Alt+Space`), type **"OpenClaw"** — you should see t | **📡 Dashboard: Channels** | Opens the channel configuration dashboard | | **🧩 Dashboard: Skills** | Opens the skills dashboard | | **⏱️ Dashboard: Cron** | Opens the scheduled jobs dashboard | -| **💬 Web Chat** | Opens the embedded Web Chat window in OpenClaw Tray | +| **💬 Web Chat** | Opens the embedded Chat page in OpenClaw Tray | | **📝 Quick Send** | Opens the Quick Send dialog to compose a message | | **🧭 Setup Wizard** | Opens QR, setup code, and manual gateway pairing | | **🧭 Command Center** | Opens gateway, tunnel, node, browser, and support diagnostics | | **🔄 Run Health Check** | Refreshes gateway or node connection health | | **⬇️ Check for Updates** | Runs a manual GitHub Releases update check | | **⚡ Activity Stream** | Opens recent tray activity and support bundle actions | -| **📋 Notification History** | Opens recent OpenClaw tray notifications | -| **⚙️ Settings** | Opens the OpenClaw Tray Settings dialog | +| **📋 Notification History** | Opens recent OpenClaw tray notifications in the Activity page | +| **⚙️ Settings** | Opens the OpenClaw Tray Settings page | | **📄 Open Log File** | Opens the current OpenClaw Tray log | | **📁 Open Logs Folder** | Opens the OpenClaw Tray logs folder | | **🗂️ Open Config Folder** | Opens the OpenClaw Tray configuration folder | diff --git a/docs/SETUP.md b/docs/SETUP.md index e58802de..2ae87713 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -43,16 +43,35 @@ After the installer finishes, OpenClaw Tray starts automatically. Look for the If you don't see it, check the **hidden icons** area (the `^` arrow next to the tray). -### 5. Configure the Connection +### 5. Onboarding Wizard -On first launch, a **Welcome** dialog appears. Click **Open Settings** to configure: +On first launch, Molty opens a **6-screen onboarding wizard** that walks you through setup: -| Setting | What to enter | -|---------|--------------| -| **Gateway URL** | `ws://localhost:18789` (if running OpenClaw locally) or your remote gateway address | -| **Token** | Your OpenClaw API token from [openclaw.ai](https://openclaw.ai) | +1. **Welcome** — A friendly greeting introducing OpenClaw and Molty. Click **Get Started** to begin. -Click **Save**. Molty will connect to the gateway and the tray icon will turn green when connected. +2. **Connection** — Choose how to connect to your gateway: + - **Local** — Select this if the gateway runs on the same machine or in WSL. The URL is pre-filled to `ws://localhost:18789`. + - **Remote** — Enter your gateway URL and bootstrap token manually, **or** paste a base64url-encoded **setup code** (a single string containing both URL and token). + - **Later** — Skip connection setup for now. You can configure it later from the tray menu → Settings. + + After entering your details, click **Test Connection**. The wizard performs a real WebSocket handshake with Ed25519 device authentication and shows real-time status feedback (connecting → connected → pairing). + +3. **Wizard** — If your gateway supports it, this screen walks you through gateway-driven configuration steps (AI provider selection, personality setup, communication channels). The steps are defined by your gateway via RPC. If the gateway doesn't support wizard mode, this screen is skipped automatically. + +4. **Permissions** — Reviews Windows system permissions needed for full functionality: + - **Notifications** — for toast alerts + - **Camera** — for camera capture + - **Microphone** — for voice input + - **Screen Capture** — for screenshots + - **Location** — optional, for location-aware features; packaged installs declare this capability so Windows may prompt for location consent the first time it is used + + Each permission shows its current status. Click **Open Settings** next to any permission to jump directly to the relevant Windows Settings page. + +5. **Chat** — Meet your agent! This screen opens a live chat powered by the gateway's web UI. A bootstrap message is sent automatically to kick off your first conversation. + +6. **Ready** — A summary of available features (tray menu, channels, voice, canvas, skills). Toggle **Launch at Login** to start Molty with Windows, then click **Finish** to complete setup. + +After the wizard, the tray icon turns green when connected. You can re-run the wizard or change settings anytime from the tray menu. ## Tray Icon Status @@ -76,14 +95,14 @@ OpenClaw Tray responds to `openclaw://` deep links, which can be invoked from a | `openclaw://dashboard/channels` | Open the channels dashboard page | | `openclaw://dashboard/skills` | Open the skills dashboard page | | `openclaw://dashboard/cron` | Open the cron dashboard page | -| `openclaw://chat` | Open the embedded Web Chat window | +| `openclaw://chat` | Open the embedded Chat page | | `openclaw://send` | Open the Quick Send dialog | | `openclaw://send?message=Hello` | Open Quick Send with pre-filled text | -| `openclaw://settings` | Open the Settings dialog | +| `openclaw://settings` | Open the Settings page | | `openclaw://setup` | Open the Setup Wizard | | `openclaw://commandcenter` | Open Command Center diagnostics | -| `openclaw://activity` | Open the Activity Stream | -| `openclaw://history` | Open Notification History | +| `openclaw://activity` | Open the Activity page | +| `openclaw://history` | Open the Activity page filtered to notification history | | `openclaw://healthcheck` | Run a manual health check | | `openclaw://check-updates` | Run a manual update check | | `openclaw://logs` | Open the current tray log file | @@ -131,6 +150,26 @@ openclaw devices approve See [issue #81](https://github.com/openclaw/openclaw-windows-node/issues/81) for context on this flow. +### Setup code doesn't work + +- Make sure you paste the **entire** setup code — it's a single base64url-encoded string. +- Check for accidental leading/trailing whitespace. +- The code must be from a compatible gateway version. Try entering the gateway URL and token manually instead. + +### Connection test fails + +- Verify the gateway URL is correct (e.g., `ws://localhost:18789` for local, or the full URL for remote). +- Check that your token is valid and hasn't expired. +- If the gateway is on another machine, ensure Windows Firewall allows traffic on the gateway port. +- See the log at `%LOCALAPPDATA%\OpenClawTray\openclaw-tray.log` for detailed error messages. + +### Wizard shows "offline" + +The Wizard screen relies on the gateway's wizard protocol. If it shows offline: +- The gateway may not support wizard mode yet — this is fine, configuration can be done later. +- Check that the gateway is running and reachable. +- You can skip the Wizard screen and configure your gateway manually from the tray menu → Settings. + ### Settings are not saved Settings are stored at `%APPDATA%\OpenClawTray\settings.json`. If this file is corrupt, delete it and reconfigure from scratch. diff --git a/docs/TEST_COVERAGE.md b/docs/TEST_COVERAGE.md index 9247b3c9..ea653465 100644 --- a/docs/TEST_COVERAGE.md +++ b/docs/TEST_COVERAGE.md @@ -1,17 +1,17 @@ # Test Coverage Summary -**571 tests total** (478 shared + 93 tray) — all passing ✅ +**1570 tests total** (1182 shared + 388 tray) — all passing ✅ | Metric | Value | |--------|-------| -| Total Tests | 571 | -| Passing | 571 (100%) | +| Total Tests | 1570 | +| Passing | 1570 (100%) | | Failing | 0 | | Framework | xUnit 2.9.3 / .NET 10.0 | ## Test Projects -### OpenClaw.Shared.Tests — 478 tests +### OpenClaw.Shared.Tests — 1182 tests #### ModelsTests - **AgentActivityTests** (~15) — glyph mapping for all ActivityKind values, display text formatting @@ -71,29 +71,26 @@ --- -### OpenClaw.Tray.Tests — 93 tests +### OpenClaw.Tray.Tests — 388 tests -#### MenuDisplayHelperTests (~40) -- `GetStatusIcon` — emoji mapping for Connected/Disconnected/Connecting/Error states -- `GetChannelStatusIcon` — status icons for running/idle/pending/error/disconnected + case-insensitive variants -- `GetNextToggleValue` — ON↔OFF toggling, case handling -- Unknown/empty status fallback +#### Core Tray Tests -#### MenuPositionerTests (~15) -- Screen edge clamping (top-left, bottom-right) -- Taskbar-at-right scenario -- Menu positioning relative to cursor +- **MenuDisplayHelperTests** (~40) — `GetStatusIcon` emoji mapping for Connected/Disconnected/Connecting/Error states, `GetChannelStatusIcon` status icons for running/idle/pending/error/disconnected + case-insensitive variants, `GetNextToggleValue` ON↔OFF toggling, unknown/empty status fallback +- **MenuPositionerTests** (~15) — Screen edge clamping (top-left, bottom-right), taskbar-at-right scenario, menu positioning relative to cursor +- **SettingsRoundTripTests** (~15) — Serialization/deserialization round trips, default values on missing keys, backward compatibility with older settings formats +- **DeepLinkParserTests** (~23) — `ParseDeepLink` protocol validation, null/empty handling, subpath parsing, trailing slash stripping, query parameter extraction, URL-encoded message handling -#### SettingsRoundTripTests (~15) -- Serialization/deserialization round trips -- Default values on missing keys -- Backward compatibility with older settings formats +#### Onboarding Tests -#### DeepLinkParserTests (~23) -- `ParseDeepLink` — protocol validation, null/empty handling, subpath parsing, trailing slash stripping -- Query parameter extraction (`GetQueryParam`) -- URL-encoded message handling -- Multiple query parameters, missing keys +- **OnboardingStateTests** (19) — Page order, mode logic, route changes, wizard state persistence, completion, disposal +- **GatewayChatHelperTests** (11) — URL scheme conversion, token encoding, localhost checks, session keys +- **LocalGatewayApproverTests** (13) — IsLocalGateway for localhost/remote/edge cases +- **SetupCodeDecoderTests** (14) — Base64url decode, size limits, JSON validation, URL/token extraction +- **GatewayHealthCheckTests** (6) — Health URI building, scheme conversion, port preservation +- **SecurityValidationTests** (16) — Locale whitelist, port range, path traversal, URI scheme validation +- **WizardStepParsingTests** (12) — JSON step parsing, options, completion, sensitive fields +- **GatewayDiscoveryServiceTests** — mDNS host selection and connection URL regression coverage +- **LocalizationValidationTests** — locale key parity, onboarding key presence, duplicate detection, and all-or-none translation consistency --- @@ -110,6 +107,9 @@ dotnet test tests/OpenClaw.Tray.Tests # Specific test class dotnet test --filter "FullyQualifiedName~MenuDisplayHelperTests" +# Onboarding tests only +dotnet test --filter "FullyQualifiedName~Onboarding" + # Verbose output dotnet test --logger "console;verbosity=detailed" ``` @@ -120,9 +120,10 @@ dotnet test --logger "console;verbosity=detailed" - Real gateway message parsing - Concurrent event handling - File I/O and thread synchronization +- End-to-end onboarding wizard flow (WebView2 requires runtime) --- -**Last Updated**: 2026-03-18 +**Last Updated**: 2026-05-04 **Framework**: xUnit 2.9.3 / .NET 10.0 -**Status**: ✅ 571 tests passing +**Status**: ✅ 1570 tests passing diff --git a/docs/WINDOWS_NODE_TESTING.md b/docs/WINDOWS_NODE_TESTING.md index 2f60127a..2c20c295 100644 --- a/docs/WINDOWS_NODE_TESTING.md +++ b/docs/WINDOWS_NODE_TESTING.md @@ -61,6 +61,7 @@ These features need the gateway to send `node.invoke` commands: | `location.get` | Get Windows location | Uses Windows location permission/settings | | `device.info` / `device.status` | Device metadata/status | Returns host/app/locale plus battery/storage/network/uptime payloads | | `browser.proxy` | Proxy browser-control host requests | Requires Browser proxy bridge enabled, a compatible browser-control host listening on gateway port + 2, and matching browser-control auth | +| `tts.speak` | Speak text aloud | Requires Text-to-speech playback enabled in Settings; gateway mode also requires `tts.speak` in `gateway.nodes.allowCommands` | ## Capabilities Advertised @@ -72,6 +73,7 @@ When the node connects, it advertises these capabilities: - `location` - Windows.Devices.Geolocation - `device` - Host/app metadata and lightweight status - `browser` - Local `browser.proxy` bridge to a browser-control host on gateway port + 2, when enabled in Settings +- `tts` - Windows speech synthesis or ElevenLabs playback, when enabled in Settings ## Security Features diff --git a/docs/a2ui/grading.md b/docs/a2ui/grading.md index 0754c385..05e7c17b 100644 --- a/docs/a2ui/grading.md +++ b/docs/a2ui/grading.md @@ -262,7 +262,7 @@ fake `WindowsNodeClient`). | Per-surface theme scope | `Hosting/SurfaceHost.cs ApplyThemeToScope` | multi-surface tab views don't bleed themes | | `IA2UITelemetry` seam | `Telemetry/IA2UITelemetry.cs` | structured events instead of log scraping | | Single-handler `Func` events on `CanvasCapability` | reviewed in commit `5b9c468` | catches accidental multi-subscribe instead of silent `Delegate.Combine` | -| MCP bearer token in Settings UI | `SettingsWindow.xaml.cs` | quality-of-life for MCP setup, kept out of action payloads | +| MCP bearer token in Settings UI | `SettingsPage.xaml.cs` | quality-of-life for MCP setup, kept out of action payloads | --- diff --git a/docs/gateway-node-integration.md b/docs/gateway-node-integration.md index ee62390a..17fe3a9f 100644 --- a/docs/gateway-node-integration.md +++ b/docs/gateway-node-integration.md @@ -79,6 +79,8 @@ Add ALL needed commands to `gateway.nodes.allowCommands` in `~/.openclaw/opencla // Device metadata/status "device.info", "device.status", + // Text-to-speech playback (enable only when agent-driven audio is desired) + "tts.speak", // System (already in Windows defaults, but listed for completeness) // "system.run", // "system.run.prepare", diff --git a/openclaw-windows-node.slnx b/openclaw-windows-node.slnx index edba045c..0828de3e 100644 --- a/openclaw-windows-node.slnx +++ b/openclaw-windows-node.slnx @@ -8,12 +8,14 @@ + + @@ -23,6 +25,7 @@ + diff --git a/src/OpenClaw.Shared/Capabilities/AppCapability.cs b/src/OpenClaw.Shared/Capabilities/AppCapability.cs new file mode 100644 index 00000000..30092bac --- /dev/null +++ b/src/OpenClaw.Shared/Capabilities/AppCapability.cs @@ -0,0 +1,154 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenClaw.Shared.Capabilities; + +/// +/// App-level capability exposing navigation, status, and configuration +/// through the MCP server for programmatic testing and CLI agents. +/// +public class AppCapability : NodeCapabilityBase +{ + public override string Category => "app"; + + private static readonly string[] _commands = new[] + { + "app.navigate", + "app.status", + "app.sessions", + "app.agents", + "app.nodes", + "app.config.get", + "app.settings.get", + "app.settings.set", + "app.menu", + "app.search", + }; + + public override IReadOnlyList Commands => _commands; + + // Handler delegates — wired up by App.xaml.cs after construction. + public Func>? NavigateHandler; + public Func? StatusHandler; + public Func>? SessionsHandler; + public Func>? AgentsHandler; + public Func? NodesHandler; + public Func>? ConfigGetHandler; + public Func? SettingsGetHandler; + public Func? SettingsSetHandler; + public Func? MenuHandler; + public Func? SearchHandler; + + public AppCapability(IOpenClawLogger logger) : base(logger) { } + + public override async Task ExecuteAsync(NodeInvokeRequest request) + { + return request.Command switch + { + "app.navigate" => await HandleNavigate(request), + "app.status" => HandleStatus(), + "app.sessions" => await HandleSessions(request), + "app.agents" => await HandleAgents(), + "app.nodes" => HandleNodes(), + "app.config.get" => await HandleConfigGet(request), + "app.settings.get" => HandleSettingsGet(request), + "app.settings.set" => HandleSettingsSet(request), + "app.menu" => HandleMenu(), + "app.search" => HandleSearch(request), + _ => Error($"Unknown command: {request.Command}") + }; + } + + private async Task HandleNavigate(NodeInvokeRequest request) + { + var page = GetStringArg(request.Args, "page"); + if (string.IsNullOrEmpty(page)) + return Error("Missing required arg: page"); + if (NavigateHandler == null) + return Error("Navigate handler not registered"); + var result = await NavigateHandler(page); + return Success(result); + } + + private NodeInvokeResponse HandleStatus() + { + if (StatusHandler == null) + return Error("Status handler not registered"); + return Success(StatusHandler()); + } + + private async Task HandleSessions(NodeInvokeRequest request) + { + var agentId = GetStringArg(request.Args, "agentId"); + if (SessionsHandler == null) + return Error("Sessions handler not registered"); + var result = await SessionsHandler(agentId); + return Success(result); + } + + private async Task HandleAgents() + { + if (AgentsHandler == null) + return Error("Agents handler not registered"); + var result = await AgentsHandler(); + return Success(result); + } + + private NodeInvokeResponse HandleNodes() + { + if (NodesHandler == null) + return Error("Nodes handler not registered"); + return Success(NodesHandler()); + } + + private async Task HandleConfigGet(NodeInvokeRequest request) + { + var path = GetStringArg(request.Args, "path"); + if (ConfigGetHandler == null) + return Error("Config handler not registered"); + var result = await ConfigGetHandler(path); + return Success(result); + } + + private NodeInvokeResponse HandleSettingsGet(NodeInvokeRequest request) + { + var name = GetStringArg(request.Args, "name"); + if (string.IsNullOrEmpty(name)) + return Error("Missing required arg: name"); + if (SettingsGetHandler == null) + return Error("Settings handler not registered"); + return Success(SettingsGetHandler(name)); + } + + private NodeInvokeResponse HandleSettingsSet(NodeInvokeRequest request) + { + var name = GetStringArg(request.Args, "name"); + var value = GetStringArg(request.Args, "value"); + if (string.IsNullOrEmpty(name)) + return Error("Missing required arg: name"); + if (value == null) + return Error("Missing required arg: value"); + if (SettingsSetHandler == null) + return Error("Settings handler not registered"); + return Success(SettingsSetHandler(name, value)); + } + + private NodeInvokeResponse HandleMenu() + { + if (MenuHandler == null) + return Error("Menu handler not registered"); + return Success(MenuHandler()); + } + + private NodeInvokeResponse HandleSearch(NodeInvokeRequest request) + { + var query = GetStringArg(request.Args, "query"); + if (string.IsNullOrEmpty(query)) + return Error("Missing required arg: query"); + if (SearchHandler == null) + return Error("Search handler not registered"); + return Success(SearchHandler(query)); + } +} diff --git a/src/OpenClaw.Shared/Capabilities/DeviceCapability.cs b/src/OpenClaw.Shared/Capabilities/DeviceCapability.cs index 4a01ec88..e975f6cd 100644 --- a/src/OpenClaw.Shared/Capabilities/DeviceCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/DeviceCapability.cs @@ -11,7 +11,9 @@ namespace OpenClaw.Shared.Capabilities; /// -/// Device metadata and lightweight health/status capability. +/// Device metadata and system health/status capability. +/// device.info - static device metadata (no provider needed). +/// device.status - rich system health data via injected IDeviceStatusProvider. /// public class DeviceCapability : NodeCapabilityBase { @@ -23,20 +25,28 @@ public class DeviceCapability : NodeCapabilityBase "device.status" ]; + private static readonly HashSet _validSections = new( + ["os", "cpu", "memory", "disk", "battery"], + StringComparer.OrdinalIgnoreCase); + + private readonly IDeviceStatusProvider? _provider; + public override IReadOnlyList Commands => _commands; - public DeviceCapability(IOpenClawLogger logger) : base(logger) + public DeviceCapability(IOpenClawLogger logger, IDeviceStatusProvider provider) + : base(logger) { + _provider = provider; } - public override Task ExecuteAsync(NodeInvokeRequest request) + public override async Task ExecuteAsync(NodeInvokeRequest request) { - return Task.FromResult(request.Command switch + return request.Command switch { "device.info" => HandleInfo(), - "device.status" => HandleStatus(), + "device.status" => await HandleStatusAsync(request), _ => Error($"Unknown command: {request.Command}") - }); + }; } private NodeInvokeResponse HandleInfo() @@ -60,29 +70,133 @@ private NodeInvokeResponse HandleInfo() }); } - private NodeInvokeResponse HandleStatus() + private async Task HandleStatusAsync(NodeInvokeRequest request) { - Logger.Info("device.status"); + if (_provider == null) + return Error("Device status provider not available"); - var storage = GetStorageStatus(Logger); - var network = GetNetworkStatus(Logger); + var sections = GetStringArrayArg(request.Args, "sections"); - return Success(new + // Reject unknown section names + var invalid = sections.Where(s => !_validSections.Contains(s)).ToArray(); + if (invalid.Length > 0) { - battery = new - { - level = (double?)null, - state = "unknown", - lowPowerModeEnabled = false - }, - thermal = new + return Error($"Unknown sections: {string.Join(", ", invalid)}. " + + $"Valid: {string.Join(", ", _validSections)}"); + } + + bool all = sections.Length == 0; + var result = new Dictionary + { + ["collectedAt"] = DateTime.UtcNow.ToString("o") + }; + + if (all || sections.Contains("os", StringComparer.OrdinalIgnoreCase)) + result["os"] = SafeCollect("os", () => _provider.GetOsInfo()); + + if (all || sections.Contains("cpu", StringComparer.OrdinalIgnoreCase)) + result["cpu"] = await SafeCollectAsync("cpu", () => _provider.GetCpuInfoAsync()); + + if (all || sections.Contains("memory", StringComparer.OrdinalIgnoreCase)) + result["memory"] = SafeCollect("memory", () => _provider.GetMemoryInfo()); + + if (all || sections.Contains("disk", StringComparer.OrdinalIgnoreCase)) + result["disk"] = SafeCollect("disk", () => _provider.GetDiskInfo()); + + if (all || sections.Contains("battery", StringComparer.OrdinalIgnoreCase)) + result["battery"] = SafeCollect("battery", () => WrapBatteryWithLegacyFields(_provider.GetBatteryInfo())); + + // Always ensure legacy battery fields exist for backward compatibility. + // Old contract: { level: null, state: "unknown", lowPowerModeEnabled: false } + // Covers: battery not requested (filtered out), provider threw (SafeCollect + // returned { error }), or battery is null. + { + var hasBattery = result.TryGetValue("battery", out var batteryVal) && batteryVal != null; + var isError = hasBattery && batteryVal!.GetType().GetProperty("error") != null; + + if (!hasBattery || isError) { - state = "nominal" - }, - storage, - network, - uptimeSeconds = Environment.TickCount64 / 1000.0 - }); + string? errorMsg = null; + if (isError) + { + var errProp = batteryVal!.GetType().GetProperty("error")!.GetValue(batteryVal); + errorMsg = errProp?.ToString(); + } + + result["battery"] = new + { + level = (double?)null, + state = "unknown", + lowPowerModeEnabled = false, + error = errorMsg + }; + } + } + + // Legacy fields preserved for backward compatibility with existing consumers. + result["thermal"] = new { state = "nominal" }; + result["storage"] = SafeCollect("storage", () => GetStorageStatus()); + result["network"] = SafeCollect("network", () => GetNetworkStatus()); + result["uptimeSeconds"] = Environment.TickCount64 / 1000.0; + + return Success(result); + } + + /// Per-section fault tolerance: one section failing doesn't kill the whole response. + private object? SafeCollect(string section, Func collector) + { + try { return collector(); } + catch (Exception ex) + { + Logger.Warn($"device.status: {section} collection failed: {ex.Message}"); + return new { error = ex.Message }; + } + } + + private async Task SafeCollectAsync(string section, Func> collector) + { + try { return await collector(); } + catch (Exception ex) + { + Logger.Warn($"device.status: {section} collection failed: {ex.Message}"); + return new { error = ex.Message }; + } + } + + /// + /// Wraps the provider's battery result with legacy fields (level, state, lowPowerModeEnabled) + /// so old consumers that read battery.level / battery.state continue to work. + /// + private static object WrapBatteryWithLegacyFields(object providerResult) + { + // Serialize the provider result to a dictionary so we can merge legacy fields. + var json = System.Text.Json.JsonSerializer.Serialize(providerResult); + var dict = System.Text.Json.JsonSerializer.Deserialize>(json) + ?? new Dictionary(); + + // Map new fields to legacy equivalents. + double? level = null; + if (dict.TryGetValue("chargePercent", out var cp) && cp.ValueKind == System.Text.Json.JsonValueKind.Number) + level = cp.GetDouble(); + + var isCharging = dict.TryGetValue("isCharging", out var ic) + && ic.ValueKind == System.Text.Json.JsonValueKind.True; + + var state = isCharging ? "charging" : (level.HasValue ? "discharging" : "unknown"); + + var result = new Dictionary + { + // Legacy fields + ["level"] = level, + ["state"] = state, + ["lowPowerModeEnabled"] = false, + }; + + // Merge all new fields from provider + foreach (var kv in dict) + result[kv.Key] = kv.Value; + + return result; } private static string GetModelIdentifier() @@ -96,67 +210,59 @@ private static string GetModelIdentifier() return $"{RuntimeInformation.OSArchitecture}".ToLowerInvariant(); } - private static object GetStorageStatus(IOpenClawLogger logger) + #region Legacy helpers (backward compat) + + private static object GetStorageStatus() { - try + var root = Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile)) + ?? Path.GetPathRoot(AppContext.BaseDirectory) + ?? string.Empty; + var drive = !string.IsNullOrWhiteSpace(root) + ? new DriveInfo(root) + : DriveInfo.GetDrives().FirstOrDefault(d => d.IsReady); + + if (drive is { IsReady: true }) { - var root = Path.GetPathRoot(Environment.GetFolderPath(Environment.SpecialFolder.UserProfile)) - ?? Path.GetPathRoot(AppContext.BaseDirectory) - ?? string.Empty; - var drive = !string.IsNullOrWhiteSpace(root) - ? new DriveInfo(root) - : DriveInfo.GetDrives().FirstOrDefault(d => d.IsReady); - - if (drive is { IsReady: true }) + var totalBytes = drive.TotalSize; + var freeBytes = drive.AvailableFreeSpace; + return new { - var totalBytes = drive.TotalSize; - var freeBytes = drive.AvailableFreeSpace; - return new - { - totalBytes, - freeBytes, - usedBytes = Math.Max(0, totalBytes - freeBytes) - }; - } - } - catch (Exception ex) - { - logger.Warn($"device.status: storage status unavailable: {ex.Message}"); + totalBytes, + freeBytes, + usedBytes = Math.Max(0, totalBytes - freeBytes) + }; } - return new - { - totalBytes = 0L, - freeBytes = 0L, - usedBytes = 0L - }; + return new { totalBytes = 0L, freeBytes = 0L, usedBytes = 0L }; } - private static object GetNetworkStatus(IOpenClawLogger logger) + private static object GetNetworkStatus() { - var interfaces = Array.Empty(); + string[] interfaces; try { interfaces = NetworkInterface.GetAllNetworkInterfaces() .Where(nic => nic.OperationalStatus == OperationalStatus.Up) - .Select(MapInterfaceType) + .Select(nic => nic.NetworkInterfaceType switch + { + NetworkInterfaceType.Wireless80211 => "wifi", + NetworkInterfaceType.Ethernet + or NetworkInterfaceType.GigabitEthernet + or NetworkInterfaceType.FastEthernetFx + or NetworkInterfaceType.FastEthernetT => "wired", + NetworkInterfaceType.Ppp + or NetworkInterfaceType.Wwanpp + or NetworkInterfaceType.Wwanpp2 => "cellular", + _ => "other" + }) .Distinct(StringComparer.Ordinal) .ToArray(); } - catch (Exception ex) - { - logger.Warn($"device.status: network interfaces unavailable: {ex.Message}"); - } + catch { interfaces = []; } - var isAvailable = false; - try - { - isAvailable = NetworkInterface.GetIsNetworkAvailable(); - } - catch (Exception ex) - { - logger.Warn($"device.status: network availability unavailable: {ex.Message}"); - } + bool isAvailable; + try { isAvailable = NetworkInterface.GetIsNetworkAvailable(); } + catch { isAvailable = false; } return new { @@ -167,19 +273,5 @@ private static object GetNetworkStatus(IOpenClawLogger logger) }; } - private static string MapInterfaceType(NetworkInterface nic) - { - return nic.NetworkInterfaceType switch - { - NetworkInterfaceType.Wireless80211 => "wifi", - NetworkInterfaceType.Ethernet - or NetworkInterfaceType.GigabitEthernet - or NetworkInterfaceType.FastEthernetFx - or NetworkInterfaceType.FastEthernetT => "wired", - NetworkInterfaceType.Ppp - or NetworkInterfaceType.Wwanpp - or NetworkInterfaceType.Wwanpp2 => "cellular", - _ => "other" - }; - } + #endregion } diff --git a/src/OpenClaw.Shared/Capabilities/SystemCapability.cs b/src/OpenClaw.Shared/Capabilities/SystemCapability.cs index 773e492c..91d0e1af 100644 --- a/src/OpenClaw.Shared/Capabilities/SystemCapability.cs +++ b/src/OpenClaw.Shared/Capabilities/SystemCapability.cs @@ -630,13 +630,33 @@ private NodeInvokeResponse HandleExecApprovalsSet(NodeInvokeRequest request) return "Empty allow rule patterns are not permitted."; var normalized = pattern.ToLowerInvariant(); - if (normalized is "*" or "* *" or "powershell *" or "pwsh *" or "cmd *" or "cmd.exe *") + + // Catch all-wildcard patterns (e.g. *, **, ?*, * ?) that match any command. + // Strip every wildcard character and whitespace; if nothing remains the pattern + // is effectively "match everything" and must be blocked regardless of spelling. + var nonWildcardContent = normalized.Replace("*", "").Replace("?", "").Trim(); + if (string.IsNullOrEmpty(nonWildcardContent)) + return $"Broad allow rule is not permitted: {pattern}"; + + // Catch shell-prefixed blanket patterns that match all commands in a given shell + // (e.g. "powershell *" allows every PowerShell command). + if (normalized is "powershell *" or "pwsh *" or "cmd *" or "cmd.exe *") return $"Broad allow rule is not permitted: {pattern}"; foreach (var dangerous in DangerousAllowPatternFragments) { if (normalized.Contains(dangerous, StringComparison.Ordinal)) return $"Dangerous allow rule is not permitted: {pattern}"; + + // Also block stem+wildcard (e.g. "rm*" bypasses "rm " because the + // fragment has a trailing space that the wildcard replaces). + var stem = dangerous.TrimEnd(); + if (stem.Length < dangerous.Length && + (normalized.Contains(stem + "*", StringComparison.Ordinal) || + normalized.Contains(stem + "?", StringComparison.Ordinal))) + { + return $"Dangerous allow rule is not permitted: {pattern}"; + } } } diff --git a/src/OpenClaw.Shared/Capabilities/TtsCapability.cs b/src/OpenClaw.Shared/Capabilities/TtsCapability.cs new file mode 100644 index 00000000..c6407828 --- /dev/null +++ b/src/OpenClaw.Shared/Capabilities/TtsCapability.cs @@ -0,0 +1,108 @@ +using System; +using System.Collections.Generic; +using System.Threading; +using System.Threading.Tasks; + +namespace OpenClaw.Shared.Capabilities; + +public sealed class TtsCapability : NodeCapabilityBase +{ + public const string SpeakCommand = "tts.speak"; + public const string WindowsProvider = "windows"; + public const string ElevenLabsProvider = "elevenlabs"; + public const int MaxTextLength = 5000; + + private static readonly string[] _commands = [SpeakCommand]; + + public override string Category => "tts"; + public override IReadOnlyList Commands => _commands; + + public event Func>? SpeakRequested; + + public TtsCapability(IOpenClawLogger logger) : base(logger) + { + } + + public static string ResolveProvider(string? requestedProvider, string? configuredProvider) + { + var provider = string.IsNullOrWhiteSpace(requestedProvider) + ? configuredProvider + : requestedProvider; + + return string.IsNullOrWhiteSpace(provider) + ? WindowsProvider + : provider.Trim().ToLowerInvariant(); + } + + public override Task ExecuteAsync(NodeInvokeRequest request) + => ExecuteAsync(request, CancellationToken.None); + + public override async Task ExecuteAsync( + NodeInvokeRequest request, + CancellationToken cancellationToken) + { + if (!string.Equals(request.Command, SpeakCommand, StringComparison.Ordinal)) + return Error($"Unknown command: {request.Command}"); + + var text = GetStringArg(request.Args, "text")?.Trim(); + if (string.IsNullOrWhiteSpace(text)) + return Error("Missing required text"); + if (text.Length > MaxTextLength) + return Error($"TTS text exceeds {MaxTextLength} characters."); + + if (SpeakRequested == null) + return Error("TTS speak not available"); + + var args = new TtsSpeakArgs + { + Text = text, + Provider = NormalizeOptional(GetStringArg(request.Args, "provider")), + VoiceId = NormalizeOptional(GetStringArg(request.Args, "voiceId")), + Model = NormalizeOptional(GetStringArg(request.Args, "model")), + Interrupt = GetBoolArg(request.Args, "interrupt") + }; + + Logger.Info($"tts.speak: provider={args.Provider ?? "(default)"}, chars={args.Text.Length}, interrupt={args.Interrupt}"); + + try + { + var result = await SpeakRequested(args, cancellationToken).ConfigureAwait(false); + return Success(new + { + spoken = result.Spoken, + provider = result.Provider, + contentType = result.ContentType, + durationMs = result.DurationMs + }); + } + catch (OperationCanceledException) when (cancellationToken.IsCancellationRequested) + { + return Error("Speak canceled"); + } + catch (Exception ex) + { + Logger.Error("TTS speak failed", ex); + return Error($"Speak failed: {ex.Message}"); + } + } + + private static string? NormalizeOptional(string? value) + => string.IsNullOrWhiteSpace(value) ? null : value.Trim(); +} + +public sealed class TtsSpeakArgs +{ + public string Text { get; set; } = ""; + public string? Provider { get; set; } + public string? VoiceId { get; set; } + public string? Model { get; set; } + public bool Interrupt { get; set; } +} + +public sealed class TtsSpeakResult +{ + public bool Spoken { get; set; } = true; + public string Provider { get; set; } = TtsCapability.WindowsProvider; + public string? ContentType { get; set; } + public int? DurationMs { get; set; } +} diff --git a/src/OpenClaw.Shared/DeviceIdentity.cs b/src/OpenClaw.Shared/DeviceIdentity.cs index fed4fcfa..3fa66eff 100644 --- a/src/OpenClaw.Shared/DeviceIdentity.cs +++ b/src/OpenClaw.Shared/DeviceIdentity.cs @@ -3,6 +3,7 @@ using System.Security.Cryptography; using System.Text; using System.Text.Json; +using OpenClaw.Shared.Mcp; using NSec.Cryptography; namespace OpenClaw.Shared; @@ -24,6 +25,43 @@ public class DeviceIdentity public string DeviceId => _deviceId ?? throw new InvalidOperationException("Device not initialized"); public string PublicKeyBase64Url => _publicKey != null ? Base64UrlEncode(_publicKey.Export(KeyBlobFormat.RawPublicKey)) : throw new InvalidOperationException("Device not initialized"); public string? DeviceToken => _deviceToken; + + public static string? TryReadStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) + { + var keyPath = Path.Combine(dataPath, "device-key-ed25519.json"); + if (!File.Exists(keyPath)) + { + return null; + } + + try + { + using var doc = JsonDocument.Parse(File.ReadAllText(keyPath)); + if (doc.RootElement.TryGetProperty(nameof(DeviceKeyData.DeviceToken), out var deviceToken) && + deviceToken.ValueKind == JsonValueKind.String) + { + var value = deviceToken.GetString(); + return string.IsNullOrWhiteSpace(value) ? null : value; + } + } + catch (IOException ex) + { + logger?.Warn($"Failed to read stored device token: {ex.Message}"); + } + catch (UnauthorizedAccessException ex) + { + logger?.Warn($"Failed to read stored device token: {ex.Message}"); + } + catch (JsonException ex) + { + logger?.Warn($"Failed to read stored device token: {ex.Message}"); + } + + return null; + } + + public static bool HasStoredDeviceToken(string dataPath, IOpenClawLogger? logger = null) => + !string.IsNullOrWhiteSpace(TryReadStoredDeviceToken(dataPath, logger)); public DeviceIdentity(string dataPath, IOpenClawLogger? logger = null) { @@ -109,8 +147,11 @@ private void GenerateNew() { Directory.CreateDirectory(dir); } + if (!string.IsNullOrEmpty(dir)) + McpAuthToken.TryRestrictDataDirectoryAcl(dir); File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true })); + McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath); _logger.Info($"Generated new Ed25519 device identity: {_deviceId}"); } @@ -270,6 +311,9 @@ public string BuildDebugPayload(string nonce, long signedAtMs, string clientId, /// public void StoreDeviceToken(string token) { + if (string.IsNullOrWhiteSpace(token)) + throw new ArgumentException("Device token cannot be empty.", nameof(token)); + _deviceToken = token; // Update the key file with the token @@ -283,6 +327,7 @@ public void StoreDeviceToken(string token) { data.DeviceToken = token; File.WriteAllText(_keyPath, JsonSerializer.Serialize(data, new JsonSerializerOptions { WriteIndented = true })); + McpAuthToken.TryRestrictSensitiveFileAcl(_keyPath); _logger.Info("Device token stored"); } } diff --git a/src/OpenClaw.Shared/ExecApprovals/ExecApprovalV2InputValidator.cs b/src/OpenClaw.Shared/ExecApprovals/ExecApprovalV2InputValidator.cs new file mode 100644 index 00000000..b7e2f33b --- /dev/null +++ b/src/OpenClaw.Shared/ExecApprovals/ExecApprovalV2InputValidator.cs @@ -0,0 +1,137 @@ +using System.Collections.Generic; +using System.Text.Json; + +namespace OpenClaw.Shared.ExecApprovals; + +/// +/// Phase 1 of the V2 exec approval pipeline: structural input validation (rail 18, step 1). +/// Parses a raw NodeInvokeRequest into a ValidatedRunRequest or returns validation-failed. +/// Does not resolve executables, detect shell wrappers, or evaluate policy. +/// +public static class ExecApprovalV2InputValidator +{ + private const int DefaultTimeoutMs = 30_000; + + public static ExecApprovalV2ValidationOutcome Validate(NodeInvokeRequest request) + { + var argv = TryParseArgv(request.Args, out bool malformedCommand); + if (malformedCommand) + return Deny("malformed-command"); + if (argv == null || argv.Length == 0) + return Deny("missing-command"); + if (string.IsNullOrWhiteSpace(argv[0])) + return Deny("empty-command"); + + // cwd — optional, but empty/whitespace is a caller error; wrong type is a protocol violation + string? cwd = null; + if (request.Args.ValueKind == JsonValueKind.Object && + request.Args.TryGetProperty("cwd", out var cwdEl)) + { + if (cwdEl.ValueKind != JsonValueKind.String) + return Deny("malformed-cwd"); + var rawCwd = cwdEl.GetString(); + if (string.IsNullOrWhiteSpace(rawCwd)) + return Deny("empty-cwd"); + cwd = rawCwd; + } + + // env — must be a JSON object if present; non-string values are a protocol violation + IReadOnlyDictionary? env = null; + if (request.Args.ValueKind == JsonValueKind.Object && + request.Args.TryGetProperty("env", out var envEl)) + { + if (envEl.ValueKind != JsonValueKind.Object) + return Deny("malformed-env"); + var dict = new Dictionary(StringComparer.OrdinalIgnoreCase); + foreach (var prop in envEl.EnumerateObject()) + { + if (prop.Value.ValueKind != JsonValueKind.String) + return Deny("malformed-env"); + dict[prop.Name] = prop.Value.GetString() ?? ""; + } + env = dict; + } + + // timeoutMs / timeout — positive integer; defaults to 30 000. + // Upper-bound clamping (legacy safety limit) is enforced in the execution/policy phase, not here. + var timeoutMs = DefaultTimeoutMs; + if (request.Args.ValueKind == JsonValueKind.Object) + { + if (request.Args.TryGetProperty("timeoutMs", out var tmsEl)) + { + if (tmsEl.ValueKind != JsonValueKind.Number || !tmsEl.TryGetInt32(out var v) || v <= 0) + return Deny("invalid-timeout"); + timeoutMs = v; + } + else if (request.Args.TryGetProperty("timeout", out var tEl)) + { + if (tEl.ValueKind != JsonValueKind.Number || !tEl.TryGetInt32(out var v) || v <= 0) + return Deny("invalid-timeout"); + timeoutMs = v; + } + } + + return ExecApprovalV2ValidationOutcome.Ok(new ValidatedRunRequest( + argv, + TryGetString(request.Args, "shell"), + cwd, + timeoutMs, + env, + TryGetString(request.Args, "agentId"), + TryGetString(request.Args, "sessionKey"))); + } + + private static ExecApprovalV2ValidationOutcome Deny(string reason) + => ExecApprovalV2ValidationOutcome.Fail(ExecApprovalV2Result.ValidationFailed(reason)); + + private static string[]? TryParseArgv(JsonElement args, out bool malformed) + { + malformed = false; + if (args.ValueKind != JsonValueKind.Object || + !args.TryGetProperty("command", out var cmdEl)) + return null; + + if (cmdEl.ValueKind == JsonValueKind.Array) + { + var list = new List(); + foreach (var item in cmdEl.EnumerateArray()) + { + if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; } + list.Add(item.GetString() ?? ""); + } + return list.Count > 0 ? [.. list] : null; + } + + if (cmdEl.ValueKind == JsonValueKind.String) + { + var cmd = cmdEl.GetString(); + if (string.IsNullOrWhiteSpace(cmd)) return null; + + // Also merge a separate "args" array when command is a bare string. + // A non-array "args" value is a protocol violation. + if (args.TryGetProperty("args", out var argsEl)) + { + if (argsEl.ValueKind != JsonValueKind.Array) { malformed = true; return null; } + var list = new List { cmd }; + foreach (var item in argsEl.EnumerateArray()) + { + if (item.ValueKind != JsonValueKind.String) { malformed = true; return null; } + list.Add(item.GetString() ?? ""); + } + return [.. list]; + } + return [cmd]; + } + + return null; + } + + private static string? TryGetString(JsonElement args, string key) + { + if (args.ValueKind != JsonValueKind.Object || + !args.TryGetProperty(key, out var el) || + el.ValueKind != JsonValueKind.String) + return null; + return el.GetString(); + } +} diff --git a/src/OpenClaw.Shared/ExecApprovals/ValidatedRunRequest.cs b/src/OpenClaw.Shared/ExecApprovals/ValidatedRunRequest.cs new file mode 100644 index 00000000..d93c9081 --- /dev/null +++ b/src/OpenClaw.Shared/ExecApprovals/ValidatedRunRequest.cs @@ -0,0 +1,62 @@ +using System.Collections.Generic; + +namespace OpenClaw.Shared.ExecApprovals; + +/// +/// Structurally-valid system.run input produced by ExecApprovalV2InputValidator. +/// Argv is guaranteed non-empty with a non-blank first element. +/// +public sealed class ValidatedRunRequest +{ + public string[] Argv { get; } + public string? Shell { get; } + public string? Cwd { get; } + public int TimeoutMs { get; } + public IReadOnlyDictionary? Env { get; } + public string? AgentId { get; } + public string? SessionKey { get; } + + internal ValidatedRunRequest( + string[] argv, + string? shell, + string? cwd, + int timeoutMs, + IReadOnlyDictionary? env, + string? agentId, + string? sessionKey) + { + Argv = argv; + Shell = shell; + Cwd = cwd; + TimeoutMs = timeoutMs; + Env = env; + AgentId = agentId; + SessionKey = sessionKey; + } +} + +/// +/// Either a ValidatedRunRequest (IsValid=true) or a typed denial (IsValid=false). +/// Produced by ExecApprovalV2InputValidator; consumed by the coordinator pipeline. +/// +public sealed class ExecApprovalV2ValidationOutcome +{ + public bool IsValid { get; } + public ValidatedRunRequest? Request { get; } + public ExecApprovalV2Result? Error { get; } + + private ExecApprovalV2ValidationOutcome(ValidatedRunRequest request) + { + IsValid = true; + Request = request; + } + + private ExecApprovalV2ValidationOutcome(ExecApprovalV2Result error) + { + IsValid = false; + Error = error; + } + + public static ExecApprovalV2ValidationOutcome Ok(ValidatedRunRequest r) => new(r); + public static ExecApprovalV2ValidationOutcome Fail(ExecApprovalV2Result e) => new(e); +} diff --git a/src/OpenClaw.Shared/IDeviceStatusProvider.cs b/src/OpenClaw.Shared/IDeviceStatusProvider.cs new file mode 100644 index 00000000..e4ab256e --- /dev/null +++ b/src/OpenClaw.Shared/IDeviceStatusProvider.cs @@ -0,0 +1,27 @@ +using System; +using System.Threading.Tasks; + +namespace OpenClaw.Shared; + +/// +/// Provider interface for platform-specific device status data collection. +/// Each method returns an object that will be serialized to JSON. +/// Implementations should handle their own error cases gracefully. +/// +public interface IDeviceStatusProvider : IDisposable +{ + /// OS version, architecture, machine name, uptime. + object GetOsInfo(); + + /// CPU name, logical processor count, usage percent (may be null during warm-up). + Task GetCpuInfoAsync(); + + /// Total/available memory in bytes and usage percent. + object GetMemoryInfo(); + + /// Fixed drive info: name, label, total/free bytes, usage percent, format. + object GetDiskInfo(); + + /// Battery presence, charge level, charging state, estimated time remaining. + object GetBatteryInfo(); +} diff --git a/src/OpenClaw.Shared/Mcp/McpAuthToken.cs b/src/OpenClaw.Shared/Mcp/McpAuthToken.cs index 2335c638..50cda88f 100644 --- a/src/OpenClaw.Shared/Mcp/McpAuthToken.cs +++ b/src/OpenClaw.Shared/Mcp/McpAuthToken.cs @@ -96,7 +96,7 @@ public static string LoadOrCreate(string path) try { File.WriteAllText(tempPath, token, Encoding.UTF8); - TryRestrictFileAcl(tempPath); + TryRestrictSensitiveFileAcl(tempPath); File.Move(tempPath, path, overwrite: true); } catch @@ -104,7 +104,7 @@ public static string LoadOrCreate(string path) try { if (File.Exists(tempPath)) File.Delete(tempPath); } catch { } throw; } - TryRestrictFileAcl(path); + TryRestrictSensitiveFileAcl(path); return token; } @@ -127,7 +127,7 @@ public static string Reset(string path) try { File.WriteAllText(tempPath, token, Encoding.UTF8); - TryRestrictFileAcl(tempPath); + TryRestrictSensitiveFileAcl(tempPath); File.Move(tempPath, path, overwrite: true); } catch @@ -137,7 +137,7 @@ public static string Reset(string path) } // Move on Windows preserves the source's DACL; re-apply defensively in // case a future rename strategy substitutes a different file. - TryRestrictFileAcl(path); + TryRestrictSensitiveFileAcl(path); return token; } @@ -183,8 +183,9 @@ public static void TryRestrictDataDirectoryAcl(string dir) catch { /* best-effort; acl restriction is defense-in-depth, not load-bearing */ } } - private static void TryRestrictFileAcl(string path) + public static void TryRestrictSensitiveFileAcl(string path) { + if (string.IsNullOrEmpty(path)) return; if (!OperatingSystem.IsWindows()) return; try { RestrictFileAclWindows(path); } catch { /* see above */ } diff --git a/src/OpenClaw.Shared/Mcp/McpHttpServer.cs b/src/OpenClaw.Shared/Mcp/McpHttpServer.cs index b71225d7..a3a45eb3 100644 --- a/src/OpenClaw.Shared/Mcp/McpHttpServer.cs +++ b/src/OpenClaw.Shared/Mcp/McpHttpServer.cs @@ -89,7 +89,9 @@ public McpHttpServer(McpToolBridge bridge, int port, IOpenClawLogger logger, str _port = port; _authToken = string.IsNullOrEmpty(authToken) ? null : authToken; _listener = new HttpListener(); - // Loopback binding — not reachable from other machines. + // Loopback binding — not reachable from other machines. Use only the + // numeric host on Windows so non-elevated startup does not require a + // separate netsh http urlacl reservation for http://localhost:port/. _listener.Prefixes.Add($"http://127.0.0.1:{port}/"); } diff --git a/src/OpenClaw.Shared/Mcp/McpToolBridge.cs b/src/OpenClaw.Shared/Mcp/McpToolBridge.cs index e3d19dce..e6ce348a 100644 --- a/src/OpenClaw.Shared/Mcp/McpToolBridge.cs +++ b/src/OpenClaw.Shared/Mcp/McpToolBridge.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.IO; -using System.Linq; using System.Text.Json; using System.Threading; using System.Threading.Tasks; @@ -168,6 +167,13 @@ private object HandleToolsList() return new { tools }; } + /// + /// The complete set of commands documented in . + /// Exposed as a stable surface so out-of-process documentation (winnode's + /// skill.md) can be drift-tested against the canonical capability surface. + /// + public static IReadOnlyCollection KnownCommands => CommandDescriptions.Keys; + /// /// Per-command descriptions advertised via tools/list. Sourced from /// the OpenClaw docs (docs/nodes/index.md, docs/platforms/mac/canvas.md) and @@ -229,6 +235,32 @@ private object HandleToolsList() "Capture a still photo from a camera. Args: deviceId (string, optional — defaults to system default camera), format ('jpeg'|'png', default 'jpeg'), maxWidth (int, default 1280), quality (int 1-100, default 80). Returns { format, width, height, base64 }.", ["camera.clip"] = "Record a short clip from a camera. Args: deviceId (string, optional), durationMs (int, required, max 60000), format ('mp4'|'webm', default 'mp4'), maxWidth (int, default 1280). Returns { format, durationMs, base64 }.", + + // tts.* + ["tts.speak"] = + "Speak text aloud on the Windows node. Args: text (string, required), provider ('windows'|'elevenlabs', optional), voiceId (string, optional), model (string, optional), interrupt (bool, default false). Returns { spoken, provider, contentType, durationMs }.", + + // app.* + ["app.navigate"] = + "Navigate the companion app to a specific page (e.g., 'home', 'sessions', 'settings'). Args: page (string, required). Returns { navigated, page }.", + ["app.status"] = + "Get current connection status, node state, and gateway info. Returns { connectionStatus, nodeConnected, nodePaired, nodePendingApproval, gatewayVersion, sessionCount, nodeCount }.", + ["app.sessions"] = + "List active sessions with optional agent filter. Args: agentId (string, optional). Returns array of { Key, Status, Model, AgeText, tokens }.", + ["app.agents"] = + "List agents from the connected gateway. Returns the raw agents JSON array.", + ["app.nodes"] = + "List connected nodes and their capabilities. Returns array of { DisplayName, NodeId, IsOnline, Platform, CapabilityCount }.", + ["app.config.get"] = + "Read gateway configuration value at a dot-path. Args: path (string, optional). Returns the config subtree or full config.", + ["app.settings.get"] = + "Read a local app setting by name. Args: name (string, required). Returns the setting value.", + ["app.settings.set"] = + "Set a local app setting (name and value). Args: name (string, required), value (string, required). Returns { name, value }.", + ["app.menu"] = + "Get tray menu state (status, session count, node count). Returns array of menu items.", + ["app.search"] = + "Search the command palette and return matching commands. Args: query (string, required). Returns array of { Title, Subtitle, Icon }.", }; private async Task HandleToolsCallAsync(JsonElement parameters, CancellationToken cancellationToken) @@ -252,7 +284,13 @@ private async Task HandleToolsCallAsync(JsonElement parameters, Cancella } var caps = _capabilityProvider(); - var capability = caps.FirstOrDefault(c => c.CanHandle(name)); + INodeCapability? capability = null; + foreach (var c in caps) + { + if (!c.CanHandle(name)) continue; + capability = c; + break; + } if (capability == null) throw new McpToolException($"Unknown tool: {name}"); @@ -309,7 +347,7 @@ private static string WriteResult(JsonElement? id, object result) JsonSerializer.Serialize(w, result, PayloadJsonOptions); w.WriteEndObject(); } - return System.Text.Encoding.UTF8.GetString(ms.ToArray()); + return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length); } private static string WriteError(JsonElement? id, int code, string message) @@ -326,7 +364,7 @@ private static string WriteError(JsonElement? id, int code, string message) w.WriteEndObject(); w.WriteEndObject(); } - return System.Text.Encoding.UTF8.GetString(ms.ToArray()); + return System.Text.Encoding.UTF8.GetString(ms.GetBuffer(), 0, (int)ms.Length); } /// diff --git a/src/OpenClaw.Shared/Models.cs b/src/OpenClaw.Shared/Models.cs index 0d7888a8..6c0ceece 100644 --- a/src/OpenClaw.Shared/Models.cs +++ b/src/OpenClaw.Shared/Models.cs @@ -1023,7 +1023,8 @@ public static class CommandCenterCommandGroups [ "camera.snap", "camera.clip", - "screen.record" + "screen.record", + "tts.speak" ]; public static readonly FrozenSet DangerousCommandSet = @@ -1046,7 +1047,9 @@ public static class CommandCenterCommandGroups public static readonly string[] MacNodeParityCommands = [ .. SafeCompanionCommands, - .. DangerousCommands, + "camera.snap", + "camera.clip", + "screen.record", "system.notify", "system.run", "system.which", @@ -1502,3 +1505,266 @@ internal static string FormatLargeNumber(long n) } } +// ── Agent Events ── + +/// Raw agent event from gateway broadcast. +public class AgentEventInfo +{ + public string RunId { get; set; } = ""; + public int Seq { get; set; } + public string Stream { get; set; } = ""; + public double Ts { get; set; } + public JsonElement Data { get; set; } + public string? SessionKey { get; set; } + public string? Summary { get; set; } + + public DateTime Timestamp => DateTimeOffset.FromUnixTimeMilliseconds((long)Ts).LocalDateTime; + + public string FormattedTime => Timestamp.ToString("HH:mm:ss.fff"); + + public string StreamUpper => Stream.ToUpperInvariant(); + + /// Color hex for stream badge (used by UI to create brush). + public string BadgeColorHex => Stream.ToLowerInvariant() switch + { + "tool" => "#FFDC781E", // Orange + "assistant" => "#FF28A050", // Green + "error" => "#FFC83232", // Red + "lifecycle" => "#FF3C78C8", // Blue + "plan" => "#FF8C50C8", // Purple + "approval" => "#FFC8A01E", // Amber + "thinking" => "#FF648CB4", // Steel + "patch" => "#FF50A0A0", // Teal + _ => "#FF646464" // Gray + }; + + /// Human-readable summary extracted from event data. + public string SummaryLine + { + get + { + if (!string.IsNullOrEmpty(Summary)) return Summary; + try + { + var s = Stream.ToLowerInvariant(); + if (s == "tool" && Data.ValueKind == JsonValueKind.Object) + { + var name = Data.TryGetProperty("name", out var n) ? n.GetString() : null; + var phase = Data.TryGetProperty("phase", out var p) ? p.GetString() : null; + if (name != null) return phase != null ? $"🔧 {name} ({phase})" : $"🔧 {name}"; + } + if (s == "assistant" && Data.ValueKind == JsonValueKind.Object) + { + var text = Data.TryGetProperty("text", out var t) ? t.GetString() : null; + if (text != null) return text.Length > 120 ? text[..120] + "…" : text; + } + if (s == "error" && Data.ValueKind == JsonValueKind.Object) + { + var msg = Data.TryGetProperty("message", out var m) ? m.GetString() + : Data.TryGetProperty("error", out var e) ? e.GetString() : null; + if (msg != null) return $"❌ {msg}"; + } + if (s == "lifecycle" && Data.ValueKind == JsonValueKind.Object) + { + var state = Data.TryGetProperty("state", out var st) ? st.GetString() : null; + if (state != null) return $"⚡ {state}"; + } + } + catch { } + return ""; + } + } + + public bool HasSummary => !string.IsNullOrEmpty(SummaryLine); + + public string DataJson + { + get + { + try + { + return JsonSerializer.Serialize(Data, new JsonSerializerOptions { WriteIndented = true }); + } + catch + { + return Data.ToString() ?? "{}"; + } + } + } +} + +// ── Node/Device Pairing ── + +public class PairingRequest +{ + public string RequestId { get; set; } = ""; + public string NodeId { get; set; } = ""; + public string? DisplayName { get; set; } + public string? Platform { get; set; } + public string? Version { get; set; } + public string? RemoteIp { get; set; } + public bool IsRepair { get; set; } + public double Ts { get; set; } + + public DateTime Timestamp => DateTimeOffset.FromUnixTimeMilliseconds((long)Ts).LocalDateTime; + + public string Description + { + get + { + var lines = new List(); + lines.Add($"Node: {DisplayName ?? NodeId}"); + if (!string.IsNullOrEmpty(Platform)) lines.Add($"Platform: {Platform}"); + if (!string.IsNullOrEmpty(Version)) lines.Add($"Version: {Version}"); + if (!string.IsNullOrEmpty(RemoteIp)) lines.Add($"IP: {RemoteIp}"); + if (IsRepair) lines.Add("Repair: yes"); + return string.Join("\n", lines); + } + } +} + +public class DevicePairingRequest +{ + public string RequestId { get; set; } = ""; + public string DeviceId { get; set; } = ""; + public string? PublicKey { get; set; } + public string? DisplayName { get; set; } + public string? Platform { get; set; } + public string? ClientId { get; set; } + public string? ClientMode { get; set; } + public string? Role { get; set; } + public string[]? Scopes { get; set; } + public string? RemoteIp { get; set; } + public bool IsRepair { get; set; } + public double Ts { get; set; } + + public DateTime Timestamp => DateTimeOffset.FromUnixTimeMilliseconds((long)Ts).LocalDateTime; + + public string Description + { + get + { + var lines = new List(); + lines.Add($"Device: {DisplayName ?? DeviceId}"); + if (!string.IsNullOrEmpty(Platform)) lines.Add($"Platform: {Platform}"); + if (!string.IsNullOrEmpty(Role)) lines.Add($"Role: {Role}"); + if (Scopes is { Length: > 0 }) lines.Add($"Scopes: {string.Join(", ", Scopes)}"); + if (!string.IsNullOrEmpty(RemoteIp)) lines.Add($"IP: {RemoteIp}"); + if (IsRepair) lines.Add("Repair: yes"); + return string.Join("\n", lines); + } + } +} + +public class PairingListInfo +{ + public List Pending { get; set; } = new(); +} + +public class DevicePairingListInfo +{ + public List Pending { get; set; } = new(); +} + +// ── Models List ── + +public class ModelInfo +{ + public string Id { get; set; } = ""; + public string? Name { get; set; } + public string? Provider { get; set; } + public int? ContextWindow { get; set; } + public bool IsConfigured { get; set; } + + public string DisplayName => Name ?? Id; +} + +public class ModelsListInfo +{ + public List Models { get; set; } = new(); +} + +// ── Agent Info ── + +public class AgentInfo +{ + public string Id { get; set; } = ""; + public string? Name { get; set; } + public string? Emoji { get; set; } + public string? Workspace { get; set; } + public string? ModelPrimary { get; set; } + public string DisplayName => Name ?? Id; +} + +// ── Presence (connected clients/instances) ── + +public class PresenceEntry +{ + public string? Host { get; set; } + public string? Ip { get; set; } + public string? Version { get; set; } + public string? Platform { get; set; } + public string? DeviceFamily { get; set; } + public string? ModelIdentifier { get; set; } + public string? Mode { get; set; } + public int? LastInputSeconds { get; set; } + public string? Reason { get; set; } + public string[]? Tags { get; set; } + public string? Text { get; set; } + public long Ts { get; set; } + public string? DeviceId { get; set; } + public string[]? Roles { get; set; } + public string[]? Scopes { get; set; } + public string? InstanceId { get; set; } + + public string DisplayName => Host ?? DeviceId ?? Ip ?? "Unknown"; + public DateTime Timestamp => DateTimeOffset.FromUnixTimeSeconds(Ts).LocalDateTime; + public string PlatformLabel => Platform ?? "unknown"; + public string ModeLabel => Mode ?? "unknown"; + + public string LastSeenText + { + get + { + if (LastInputSeconds is not { } secs) return ""; + if (secs < 60) return $"{secs}s ago"; + if (secs < 3600) return $"{secs / 60}m ago"; + return $"{secs / 3600}h ago"; + } + } +} + +// ── Gateway Discovery ── + +public class DiscoveredGateway +{ + public string Id { get; set; } = ""; + public string DisplayName { get; set; } = ""; + public string? Host { get; set; } + public int Port { get; set; } + public string? LanHost { get; set; } + public string? TailnetDns { get; set; } + public bool TlsEnabled { get; set; } + public string? TlsFingerprint { get; set; } + + public string ConnectionUrl + { + get + { + var scheme = TlsEnabled ? "wss" : "ws"; + var host = Host ?? LanHost ?? "localhost"; + return $"{scheme}://{host}:{Port}"; + } + } + + public string HttpUrl + { + get + { + var scheme = TlsEnabled ? "https" : "http"; + var host = Host ?? LanHost ?? "localhost"; + return $"{scheme}://{host}:{Port}"; + } + } +} + diff --git a/src/OpenClaw.Shared/OpenClawGatewayClient.cs b/src/OpenClaw.Shared/OpenClawGatewayClient.cs index e662ae84..0a3e402c 100644 --- a/src/OpenClaw.Shared/OpenClawGatewayClient.cs +++ b/src/OpenClaw.Shared/OpenClawGatewayClient.cs @@ -2,6 +2,7 @@ using System.Collections.Frozen; using System.Collections.Generic; using System.IO; +using System.Linq; using System.Text; using System.Text.Json; using System.Threading; @@ -25,6 +26,13 @@ public class OpenClawGatewayClient : WebSocketClientBase "operator.approvals", "operator.pairing" ]; + private static readonly string[] s_operatorBootstrapScopes = + [ + "operator.approvals", + "operator.read", + "operator.talk.secrets", + "operator.write" + ]; private enum SignatureTokenMode { @@ -56,11 +64,26 @@ private enum SignatureTokenMode private bool _usageCostUnsupported; private bool _sessionPreviewUnsupported; private bool _nodeListUnsupported; + private bool _modelsListUnsupported; + private bool _nodePairListUnsupported; + private bool _devicePairListUnsupported; + private bool _agentsListUnsupported; + private bool _agentFilesListUnsupported; + private bool _agentFileGetUnsupported; private bool _operatorReadScopeUnavailable; private bool _pairingRequiredAwaitingApproval; private bool _authFailed; + private readonly bool _useBootstrapHandoffAuth; + + /// True when the gateway reported "pairing required" for this device. + public bool IsPairingRequired => _pairingRequiredAwaitingApproval; + + /// True when the device signature was rejected in all supported modes. + public bool IsAuthFailed => _authFailed; + private IReadOnlyList? _userRules; private bool _preferStructuredCategories = true; + private readonly System.Collections.Concurrent.ConcurrentDictionary> _pendingWizardResponses = new(); /// /// Controls whether structured notification metadata (Intent, Channel) takes priority @@ -74,6 +97,12 @@ private void ResetUnsupportedMethodFlags() _usageCostUnsupported = false; _sessionPreviewUnsupported = false; _nodeListUnsupported = false; + _modelsListUnsupported = false; + _nodePairListUnsupported = false; + _devicePairListUnsupported = false; + _agentsListUnsupported = false; + _agentFilesListUnsupported = false; + _agentFileGetUnsupported = false; _operatorReadScopeUnavailable = false; } @@ -129,14 +158,34 @@ protected override void OnDisposing() public event EventHandler? SessionPreviewUpdated; public event EventHandler? SessionCommandCompleted; public event EventHandler? GatewaySelfUpdated; + public event EventHandler? CronListUpdated; + public event EventHandler? CronStatusUpdated; + public event EventHandler? SkillsStatusUpdated; + public event EventHandler? ConfigUpdated; + public event EventHandler? ConfigSchemaUpdated; + + // New events for agent events, pairing, and models + public event EventHandler? AgentEventReceived; + public event EventHandler? NodePairListUpdated; + public event EventHandler? DevicePairListUpdated; + public event EventHandler? ModelsListUpdated; + public event EventHandler? PresenceUpdated; + public event EventHandler? AgentsListUpdated; + public event EventHandler? AgentFilesListUpdated; + public event EventHandler? AgentFileContentUpdated; public string? OperatorDeviceId => _operatorDeviceId; public IReadOnlyList GrantedOperatorScopes => _grantedOperatorScopes; public bool IsConnectedToGateway => IsConnected; - public OpenClawGatewayClient(string gatewayUrl, string token, IOpenClawLogger? logger = null) + public OpenClawGatewayClient( + string gatewayUrl, + string token, + IOpenClawLogger? logger = null, + bool useBootstrapHandoffAuth = false) : base(gatewayUrl, token, logger) { + _useBootstrapHandoffAuth = useBootstrapHandoffAuth; var dataPath = Path.Combine( Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData), "OpenClawTray"); @@ -232,11 +281,49 @@ public async Task SendChatMessageAsync(string message, string? sessionKey = null _logger.Info($"Sent chat message ({message.Length} chars)"); } + /// + /// Sends a wizard RPC request and waits for the response payload. + /// Used for wizard.start, wizard.next, wizard.cancel, wizard.status. + /// + public async Task SendWizardRequestAsync(string method, object? parameters = null, int timeoutMs = 30000) + { + if (!IsConnected) + throw new InvalidOperationException("Gateway connection is not open"); + + var requestId = Guid.NewGuid().ToString(); + var completion = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously); + _pendingWizardResponses[requestId] = completion; + TrackPendingRequest(requestId, method); + + try + { + await SendRawAsync(SerializeRequest(requestId, method, parameters)); + } + catch + { + _pendingWizardResponses.TryRemove(requestId, out _); + RemovePendingRequest(requestId); + throw; + } + + var completedTask = await Task.WhenAny(completion.Task, Task.Delay(timeoutMs, CancellationToken)); + if (completedTask != completion.Task) + { + _pendingWizardResponses.TryRemove(requestId, out _); + throw new TimeoutException($"Timed out waiting for {method} response"); + } + + return await completion.Task; + } + /// Request session list from gateway. - public async Task RequestSessionsAsync() + public async Task RequestSessionsAsync(string? agentId = null) { if (_operatorReadScopeUnavailable) return; - await SendTrackedRequestAsync("sessions.list"); + if (!string.IsNullOrEmpty(agentId)) + await SendTrackedRequestAsync("sessions.list", new { agentId }); + else + await SendTrackedRequestAsync("sessions.list"); } /// Request usage/context info from gateway (may not be supported on all gateways). @@ -332,6 +419,139 @@ public Task CompactSessionAsync(string key, int maxLines = 400) return TrySendTrackedRequestAsync("sessions.compact", new { key, maxLines }); } + // Cron job management + + public async Task RequestCronListAsync() + { + await SendTrackedRequestAsync("cron.list"); + } + + public async Task RequestCronStatusAsync() + { + await SendTrackedRequestAsync("cron.status"); + } + + public Task RunCronJobAsync(string jobId, bool force = true) + { + return TrySendTrackedRequestAsync("cron.run", new { jobId, force }); + } + + public Task RemoveCronJobAsync(string jobId) + { + return TrySendTrackedRequestAsync("cron.remove", new { id = jobId }); + } + + // Skills/plugin management + + public async Task RequestSkillsStatusAsync(string? agentId = null) + { + if (!string.IsNullOrEmpty(agentId)) + await SendTrackedRequestAsync("skills.status", new { agentId }); + else + await SendTrackedRequestAsync("skills.status"); + } + + public Task InstallSkillAsync(string skillId) + { + return TrySendTrackedRequestAsync("skills.install", new { id = skillId }); + } + + public Task UpdateSkillAsync(string skillId) + { + return TrySendTrackedRequestAsync("skills.update", new { id = skillId }); + } + + // Gateway config management + + public async Task RequestConfigAsync() + { + await SendTrackedRequestAsync("config.get"); + } + + public async Task RequestConfigSchemaAsync() + { + await SendTrackedRequestAsync("config.schema"); + } + + public Task SetConfigAsync(string path, object value) + { + return TrySendTrackedRequestAsync("config.set", new { path, value }); + } + + /// + /// Patch the gateway config. The gateway expects { raw: "full json string", baseHash: "..." }. + /// + public Task PatchConfigAsync(JsonElement fullConfig, string? baseHash) + { + var raw = fullConfig.GetRawText(); + if (baseHash != null) + return TrySendTrackedRequestAsync("config.patch", new { raw, baseHash }); + else + return TrySendTrackedRequestAsync("config.patch", new { raw }); + } + + // Agent methods + + public async Task RequestAgentsListAsync() + { + if (_agentsListUnsupported) return; + await SendTrackedRequestAsync("agents.list"); + } + + public async Task RequestAgentFilesListAsync(string agentId = "main") + { + if (_agentFilesListUnsupported) return; + await SendTrackedRequestAsync("agents.files.list", new { agentId }); + } + + public async Task RequestAgentFileGetAsync(string agentId, string name) + { + if (_agentFileGetUnsupported) return; + await SendTrackedRequestAsync("agents.files.get", new { agentId, name }); + } + + // Models list + + public async Task RequestModelsListAsync() + { + if (_modelsListUnsupported) return; + await SendTrackedRequestAsync("models.list", new { view = "configured" }); + } + + // Node/Device pairing + + public async Task RequestNodePairListAsync() + { + if (_nodePairListUnsupported) return; + await SendTrackedRequestAsync("node.pair.list"); + } + + public Task NodePairApproveAsync(string requestId) + { + return TrySendTrackedRequestAsync("node.pair.approve", new { requestId }); + } + + public Task NodePairRejectAsync(string requestId) + { + return TrySendTrackedRequestAsync("node.pair.reject", new { requestId }); + } + + public async Task RequestDevicePairListAsync() + { + if (_devicePairListUnsupported) return; + await SendTrackedRequestAsync("device.pair.list"); + } + + public Task DevicePairApproveAsync(string requestId) + { + return TrySendTrackedRequestAsync("device.pair.approve", new { requestId }); + } + + public Task DevicePairRejectAsync(string requestId) + { + return TrySendTrackedRequestAsync("device.pair.reject", new { requestId }); + } + /// Start a channel (telegram, whatsapp, etc). public async Task StartChannelAsync(string channelName) { @@ -384,6 +604,7 @@ private async Task SendConnectMessageAsync(string? nonce = null) { var requestId = Guid.NewGuid().ToString(); TrackPendingRequest(requestId, "connect"); + var requestedScopes = GetRequestedOperatorScopes(); var signedAt = _challengeTimestampMs ?? DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); var connectNonce = nonce ?? string.Empty; @@ -398,7 +619,7 @@ private async Task SendConnectMessageAsync(string? nonce = null) OperatorClientId, OperatorClientMode, OperatorRole, - s_operatorScopes, + requestedScopes, signatureToken) : _deviceIdentity.SignConnectPayloadV3( connectNonce, @@ -406,7 +627,7 @@ private async Task SendConnectMessageAsync(string? nonce = null) OperatorClientId, OperatorClientMode, OperatorRole, - s_operatorScopes, + requestedScopes, signatureToken, OperatorPlatform, OperatorDeviceFamily); @@ -430,11 +651,11 @@ private async Task SendConnectMessageAsync(string? nonce = null) displayName = OperatorClientDisplayName }, role = OperatorRole, - scopes = s_operatorScopes, + scopes = requestedScopes, caps = Array.Empty(), commands = Array.Empty(), permissions = new { }, - auth = new { token = _connectAuthToken }, + auth = BuildAuthPayload(), locale = "en-US", userAgent = "openclaw-windows-tray/1.0.0", device = new @@ -459,6 +680,40 @@ private async Task SendConnectMessageAsync(string? nonce = null) } } + private string[] GetRequestedOperatorScopes() => + _useBootstrapHandoffAuth && string.IsNullOrEmpty(_deviceIdentity.DeviceToken) + ? s_operatorBootstrapScopes + : s_operatorScopes; + + /// + /// Builds the auth payload for the connect handshake, matching the gateway's + /// HandshakeConnectAuth type: { token?, bootstrapToken?, deviceToken?, password? }. + /// Fresh devices send bootstrapToken for initial QR/setup-code pairing. + /// Paired devices send an explicit deviceToken. + /// + private Dictionary BuildAuthPayload() + { + var auth = new Dictionary { ["token"] = _connectAuthToken }; + + if (!_useBootstrapHandoffAuth) + { + return auth; + } + + if (!string.IsNullOrEmpty(_deviceIdentity.DeviceToken)) + { + // Paired device: send explicit device token for cleaner auth path + auth["deviceToken"] = _deviceIdentity.DeviceToken; + } + else + { + // Fresh device: send bootstrap token for initial pairing + auth["bootstrapToken"] = _token; + } + + return auth; + } + private async Task SendTrackedRequestAsync(string method, object? parameters = null) { if (!IsConnected) return; @@ -649,6 +904,27 @@ private void HandleResponse(JsonElement root) return; } + // Check for pending wizard response + if (requestId != null && _pendingWizardResponses.TryRemove(requestId, out var wizardCompletion)) + { + if (root.TryGetProperty("ok", out var okWiz) && okWiz.ValueKind == JsonValueKind.False) + { + var message = TryGetErrorMessage(root) ?? "wizard request failed"; + wizardCompletion.TrySetException(new InvalidOperationException(message)); + } + else if (root.TryGetProperty("payload", out var wizPayload)) + { + // Log the payload kind for debugging + _logger.Info($"Wizard response payload kind={wizPayload.ValueKind}, raw={wizPayload.ToString()?.Substring(0, Math.Min(200, wizPayload.ToString()?.Length ?? 0))}"); + wizardCompletion.TrySetResult(wizPayload.Clone()); + } + else + { + wizardCompletion.TrySetResult(root.Clone()); + } + return; + } + if (root.TryGetProperty("ok", out var okProp) && okProp.ValueKind == JsonValueKind.False) { @@ -691,6 +967,10 @@ private void HandleResponse(JsonElement root) _logger.Info($"Granted operator scopes: {string.Join(", ", _grantedOperatorScopes)}"); } _logger.Info($"Main session key: {_mainSessionKey}"); + + // Extract presence from snapshot + TryParsePresence(payload); + RaiseStatusChanged(ConnectionStatus.Connected); // Request initial state after handshake @@ -701,6 +981,7 @@ private void HandleResponse(JsonElement root) await RequestSessionsAsync(); await RequestUsageAsync(); await RequestNodesAsync(); + await RequestAgentsListAsync(); }); } @@ -764,6 +1045,56 @@ private bool HandleKnownResponse(string method, JsonElement payload) case "sessions.compact": ParseSessionCommandResult(method, payload); return true; + case "cron.list": + CronListUpdated?.Invoke(this, payload.Clone()); + return true; + case "cron.status": + CronStatusUpdated?.Invoke(this, payload.Clone()); + return true; + case "cron.run": + case "cron.remove": + return true; + case "skills.status": + SkillsStatusUpdated?.Invoke(this, payload.Clone()); + return true; + case "skills.install": + case "skills.update": + return true; + case "config.get": + ConfigUpdated?.Invoke(this, payload.Clone()); + return true; + case "config.schema": + ConfigSchemaUpdated?.Invoke(this, payload.Clone()); + return true; + case "config.set": + case "config.patch": + return true; + case "agents.list": + AgentsListUpdated?.Invoke(this, payload.Clone()); + return true; + case "agents.files.list": + AgentFilesListUpdated?.Invoke(this, payload.Clone()); + return true; + case "agents.files.get": + AgentFileContentUpdated?.Invoke(this, payload.Clone()); + return true; + case "models.list": + ParseModelsList(payload); + return true; + case "node.pair.list": + ParseNodePairList(payload); + return true; + case "node.pair.approve": + case "node.pair.reject": + _ = RequestNodePairListAsync(); + return true; + case "device.pair.list": + ParseDevicePairList(payload); + return true; + case "device.pair.approve": + case "device.pair.reject": + _ = RequestDevicePairListAsync(); + return true; default: return false; } @@ -856,6 +1187,30 @@ private void HandleRequestError(string? method, JsonElement root) _nodeListUnsupported = true; _logger.Warn("node.list unsupported on gateway"); return; + case "models.list": + _modelsListUnsupported = true; + _logger.Warn("models.list unsupported on gateway"); + return; + case "node.pair.list": + _nodePairListUnsupported = true; + _logger.Warn("node.pair.list unsupported on gateway"); + return; + case "device.pair.list": + _devicePairListUnsupported = true; + _logger.Warn("device.pair.list unsupported on gateway"); + return; + case "agents.list": + _agentsListUnsupported = true; + _logger.Warn("agents.list unsupported on gateway"); + return; + case "agents.files.list": + _agentFilesListUnsupported = true; + _logger.Warn("agents.files.list unsupported on gateway"); + return; + case "agents.files.get": + _agentFileGetUnsupported = true; + _logger.Warn("agents.files.get unsupported on gateway"); + return; } } @@ -1134,6 +1489,21 @@ private void HandleEvent(JsonElement root) case "session": HandleSessionEvent(root); break; + case "node.pair.requested": + case "node.pair.resolved": + // Refresh node pair list when pairing state changes + _ = RequestNodePairListAsync(); + break; + case "device.pair.requested": + case "device.pair.resolved": + // Refresh device pair list when pairing state changes + _ = RequestDevicePairListAsync(); + break; + case "presence": + // Presence snapshot broadcast when clients connect/disconnect + if (root.TryGetProperty("payload", out var presPayload)) + TryParsePresenceFromBroadcast(presPayload); + break; } } @@ -1163,13 +1533,33 @@ private void HandleAgentEvent(JsonElement root) { if (!root.TryGetProperty("payload", out var payload)) return; - // Determine session + // sessionKey is inside payload, not root var sessionKey = "unknown"; - if (root.TryGetProperty("sessionKey", out var sk)) + if (payload.TryGetProperty("sessionKey", out var sk)) sessionKey = sk.GetString() ?? "unknown"; var isMain = sessionKey == "main" || sessionKey.Contains(":main:"); - // Parse activity from stream field + // Emit raw agent event (cloned for thread safety) + try + { + var evt = new AgentEventInfo + { + RunId = payload.TryGetProperty("runId", out var rid) ? rid.GetString() ?? "" : "", + Seq = payload.TryGetProperty("seq", out var seqProp) && seqProp.ValueKind == JsonValueKind.Number ? seqProp.GetInt32() : 0, + Stream = payload.TryGetProperty("stream", out var streamProp2) ? streamProp2.GetString() ?? "" : "", + Ts = payload.TryGetProperty("ts", out var tsProp) && tsProp.ValueKind == JsonValueKind.Number ? tsProp.GetDouble() : 0, + Data = payload.TryGetProperty("data", out var dataProp) ? dataProp.Clone() : default, + SessionKey = sessionKey, + Summary = payload.TryGetProperty("summary", out var sumProp) ? sumProp.GetString() : null + }; + AgentEventReceived?.Invoke(this, evt); + } + catch (Exception ex) + { + _logger.Warn($"Failed to emit agent event: {ex.Message}"); + } + + // Parse activity from stream field (existing behavior) if (payload.TryGetProperty("stream", out var streamProp)) { var stream = streamProp.GetString(); @@ -1426,66 +1816,80 @@ private void ParseSessions(JsonElement sessions) SessionInfo[] snapshot; lock (_sessionsLock) { - _sessions.Clear(); - - // Handle both Array format and Object (dictionary) format + // Merge instead of clear — collect incoming keys, update/add, then remove absent + var incomingKeys = new HashSet(); + if (sessions.ValueKind == JsonValueKind.Array) { foreach (var item in sessions.EnumerateArray()) { - ParseSessionItem(item); + var key = ParseSessionItem(item); + if (key != null) incomingKeys.Add(key); } } else if (sessions.ValueKind == JsonValueKind.Object) { - // Object format: keys are session IDs, values could be session info objects or simple strings foreach (var prop in sessions.EnumerateObject()) { var sessionKey = prop.Name; - - // Skip metadata fields that aren't actual sessions + if (sessionKey is "recent" or "count" or "path" or "defaults" or "ts") continue; - - // Skip non-session keys (must look like a session key pattern) + if (!sessionKey.Equals("global", StringComparison.OrdinalIgnoreCase) && !sessionKey.Contains(':') && !sessionKey.Contains("agent") && !sessionKey.Contains("session")) continue; - - var session = new SessionInfo { Key = sessionKey }; + var item = prop.Value; - - // Detect main session from key pattern - "agent:main:main" ends with ":main" + + if (item.ValueKind == JsonValueKind.String) + { + var strVal = item.GetString() ?? ""; + if (strVal.StartsWith("/") || strVal.Contains("/.")) + continue; + } + else if (item.ValueKind == JsonValueKind.Number) + { + continue; + } + + // Update or create session + if (!_sessions.TryGetValue(sessionKey, out var session)) + { + session = new SessionInfo { Key = sessionKey }; + } + var endsWithMain = sessionKey.EndsWith(":main"); session.IsMain = sessionKey == "main" || endsWithMain || sessionKey.Contains(":main:main"); - _logger.Debug($"Session key={sessionKey}, endsWithMain={endsWithMain}, IsMain={session.IsMain}"); - - // Value might be an object with session details or just a string status + if (item.ValueKind == JsonValueKind.Object) { - // Only override IsMain if the JSON explicitly says true if (item.TryGetProperty("isMain", out var isMain) && isMain.GetBoolean()) session.IsMain = true; PopulateSessionFromObject(session, item); } else if (item.ValueKind == JsonValueKind.String) { - // Simple string value - skip if it looks like a path (metadata) - var strVal = item.GetString() ?? ""; - if (strVal.StartsWith("/") || strVal.Contains("/.")) - continue; - session.Status = strVal; - } - else if (item.ValueKind == JsonValueKind.Number) - { - // Skip numeric values (like count) - continue; + session.Status = item.GetString() ?? ""; } - - _sessions[session.Key] = session; + + _sessions[sessionKey] = session; + incomingKeys.Add(sessionKey); + } + } + + // Remove sessions no longer present in the gateway response + { + var staleKeys = new List(); + foreach (var key in _sessions.Keys) + { + if (!incomingKeys.Contains(key)) + staleKeys.Add(key); } + foreach (var key in staleKeys) + _sessions.Remove(key); } snapshot = GetSessionListInternal(); @@ -1499,24 +1903,29 @@ private void ParseSessions(JsonElement sessions) } } - private void ParseSessionItem(JsonElement item) + private string? ParseSessionItem(JsonElement item) { - var session = new SessionInfo(); + var sessionKey = "unknown"; if (item.TryGetProperty("key", out var key)) - session.Key = key.GetString() ?? "unknown"; - - // Detect main from key pattern first - session.IsMain = session.Key == "main" || - session.Key.EndsWith(":main") || - session.Key.Contains(":main:main"); + sessionKey = key.GetString() ?? "unknown"; + + // Update or create + if (!_sessions.TryGetValue(sessionKey, out var session)) + { + session = new SessionInfo { Key = sessionKey }; + } + + session.IsMain = sessionKey == "main" || + sessionKey.EndsWith(":main") || + sessionKey.Contains(":main:main"); - // Only override if JSON explicitly says true if (item.TryGetProperty("isMain", out var isMain) && isMain.GetBoolean()) session.IsMain = true; PopulateSessionFromObject(session, item); _sessions[session.Key] = session; + return session.Key; } private void PopulateSessionFromObject(SessionInfo session, JsonElement item) @@ -1562,8 +1971,16 @@ private void PopulateSessionFromObject(SessionInfo session, JsonElement item) if (item.TryGetProperty("startedAt", out var started)) { - if (DateTime.TryParse(started.GetString(), out var dt)) - session.StartedAt = dt; + if (started.ValueKind == JsonValueKind.String) + { + if (DateTime.TryParse(started.GetString(), out var dt)) + session.StartedAt = dt; + } + else if (started.ValueKind == JsonValueKind.Number) + { + var ms = started.GetInt64(); + session.StartedAt = DateTimeOffset.FromUnixTimeMilliseconds(ms).LocalDateTime; + } } } @@ -2085,4 +2502,187 @@ private static string ShortenPath(string path) ? $"…/{parts[^2]}/{parts[^1]}" : parts[^1]; } + + // ── Parse methods for new features ── + + private void ParseModelsList(JsonElement payload) + { + try + { + var info = new ModelsListInfo(); + // Gateway returns { models: [...] } or just an array + var modelsArray = payload.ValueKind == JsonValueKind.Array + ? payload + : payload.TryGetProperty("models", out var m) ? m : default; + + if (modelsArray.ValueKind == JsonValueKind.Array) + { + foreach (var item in modelsArray.EnumerateArray()) + { + var model = new ModelInfo + { + Id = item.TryGetProperty("id", out var id) ? id.GetString() ?? "" : "", + Name = item.TryGetProperty("name", out var name) ? name.GetString() : null, + Provider = item.TryGetProperty("provider", out var prov) ? prov.GetString() : null, + ContextWindow = item.TryGetProperty("contextWindow", out var cw) && cw.ValueKind == JsonValueKind.Number ? cw.GetInt32() : null, + IsConfigured = item.TryGetProperty("configured", out var cfg) && cfg.ValueKind == JsonValueKind.True + }; + if (!string.IsNullOrEmpty(model.Id)) + info.Models.Add(model); + } + } + ModelsListUpdated?.Invoke(this, info); + } + catch (Exception ex) + { + _logger.Warn($"Failed to parse models.list: {ex.Message}"); + } + } + + private void ParseNodePairList(JsonElement payload) + { + try + { + var info = new PairingListInfo(); + var pending = payload.TryGetProperty("pending", out var p) ? p : default; + if (pending.ValueKind == JsonValueKind.Array) + { + foreach (var item in pending.EnumerateArray()) + { + info.Pending.Add(new PairingRequest + { + RequestId = item.TryGetProperty("requestId", out var rid) ? rid.GetString() ?? "" : "", + NodeId = item.TryGetProperty("nodeId", out var nid) ? nid.GetString() ?? "" : "", + DisplayName = item.TryGetProperty("displayName", out var dn) ? dn.GetString() : null, + Platform = item.TryGetProperty("platform", out var plat) ? plat.GetString() : null, + Version = item.TryGetProperty("version", out var ver) ? ver.GetString() : null, + RemoteIp = item.TryGetProperty("remoteIp", out var ip) ? ip.GetString() : null, + IsRepair = item.TryGetProperty("isRepair", out var rep) && rep.ValueKind == JsonValueKind.True, + Ts = item.TryGetProperty("ts", out var ts) && ts.ValueKind == JsonValueKind.Number ? ts.GetDouble() : 0 + }); + } + } + NodePairListUpdated?.Invoke(this, info); + } + catch (Exception ex) + { + _logger.Warn($"Failed to parse node.pair.list: {ex.Message}"); + } + } + + private void ParseDevicePairList(JsonElement payload) + { + try + { + var info = new DevicePairingListInfo(); + var pending = payload.TryGetProperty("pending", out var p) ? p : default; + if (pending.ValueKind == JsonValueKind.Array) + { + foreach (var item in pending.EnumerateArray()) + { + string[]? scopes = null; + if (item.TryGetProperty("scopes", out var sc) && sc.ValueKind == JsonValueKind.Array) + { + var scopeList = new List(); + foreach (var s in sc.EnumerateArray()) + if (s.GetString() is string sv) scopeList.Add(sv); + scopes = scopeList.ToArray(); + } + + info.Pending.Add(new DevicePairingRequest + { + RequestId = item.TryGetProperty("requestId", out var rid) ? rid.GetString() ?? "" : "", + DeviceId = item.TryGetProperty("deviceId", out var did) ? did.GetString() ?? "" : "", + PublicKey = item.TryGetProperty("publicKey", out var pk) ? pk.GetString() : null, + DisplayName = item.TryGetProperty("displayName", out var dn) ? dn.GetString() : null, + Platform = item.TryGetProperty("platform", out var plat) ? plat.GetString() : null, + ClientId = item.TryGetProperty("clientId", out var cid) ? cid.GetString() : null, + ClientMode = item.TryGetProperty("clientMode", out var cm) ? cm.GetString() : null, + Role = item.TryGetProperty("role", out var role) ? role.GetString() : null, + Scopes = scopes, + RemoteIp = item.TryGetProperty("remoteIp", out var ip) ? ip.GetString() : null, + IsRepair = item.TryGetProperty("isRepair", out var rep) && rep.ValueKind == JsonValueKind.True, + Ts = item.TryGetProperty("ts", out var ts) && ts.ValueKind == JsonValueKind.Number ? ts.GetDouble() : 0 + }); + } + } + DevicePairListUpdated?.Invoke(this, info); + } + catch (Exception ex) + { + _logger.Warn($"Failed to parse device.pair.list: {ex.Message}"); + } + } + + private void TryParsePresence(JsonElement payload) + { + try + { + if (!payload.TryGetProperty("snapshot", out var snapshot)) return; + if (!snapshot.TryGetProperty("presence", out var presenceArray)) return; + if (presenceArray.ValueKind != JsonValueKind.Array) return; + + var entries = ParsePresenceArray(presenceArray); + _logger.Info($"Parsed {entries.Length} presence entries from handshake"); + PresenceUpdated?.Invoke(this, entries); + } + catch (Exception ex) + { + _logger.Warn($"Failed to parse presence from handshake: {ex.Message}"); + } + } + + private void TryParsePresenceFromBroadcast(JsonElement payload) + { + try + { + // Broadcast may contain presence array directly or nested + var presenceArray = payload.ValueKind == JsonValueKind.Array + ? payload + : payload.TryGetProperty("presence", out var p) ? p : default; + + if (presenceArray.ValueKind != JsonValueKind.Array) return; + + var entries = ParsePresenceArray(presenceArray); + PresenceUpdated?.Invoke(this, entries); + } + catch (Exception ex) + { + _logger.Warn($"Failed to parse presence broadcast: {ex.Message}"); + } + } + + private static PresenceEntry[] ParsePresenceArray(JsonElement array) + { + var list = new List(); + foreach (var item in array.EnumerateArray()) + { + list.Add(new PresenceEntry + { + Host = item.TryGetProperty("host", out var h) ? h.GetString() : null, + Ip = item.TryGetProperty("ip", out var ip) ? ip.GetString() : null, + Version = item.TryGetProperty("version", out var v) ? v.GetString() : null, + Platform = item.TryGetProperty("platform", out var p) ? p.GetString() : null, + DeviceFamily = item.TryGetProperty("deviceFamily", out var df) ? df.GetString() : null, + ModelIdentifier = item.TryGetProperty("modelIdentifier", out var mi) ? mi.GetString() : null, + Mode = item.TryGetProperty("mode", out var m) ? m.GetString() : null, + LastInputSeconds = item.TryGetProperty("lastInputSeconds", out var lis) && lis.ValueKind == JsonValueKind.Number ? lis.GetInt32() : null, + Reason = item.TryGetProperty("reason", out var r) ? r.GetString() : null, + Tags = item.TryGetProperty("tags", out var t) && t.ValueKind == JsonValueKind.Array + ? t.EnumerateArray().Select(x => x.GetString() ?? "").Where(x => x.Length > 0).ToArray() + : null, + Text = item.TryGetProperty("text", out var tx) ? tx.GetString() : null, + Ts = item.TryGetProperty("ts", out var ts) && ts.ValueKind == JsonValueKind.Number ? ts.GetInt64() : 0, + DeviceId = item.TryGetProperty("deviceId", out var did) ? did.GetString() : null, + Roles = item.TryGetProperty("roles", out var roles) && roles.ValueKind == JsonValueKind.Array + ? roles.EnumerateArray().Select(x => x.GetString() ?? "").Where(x => x.Length > 0).ToArray() + : null, + Scopes = item.TryGetProperty("scopes", out var sc) && sc.ValueKind == JsonValueKind.Array + ? sc.EnumerateArray().Select(x => x.GetString() ?? "").Where(x => x.Length > 0).ToArray() + : null, + InstanceId = item.TryGetProperty("instanceId", out var iid) ? iid.GetString() : null, + }); + } + return list.ToArray(); + } } diff --git a/src/OpenClaw.Shared/SettingsData.cs b/src/OpenClaw.Shared/SettingsData.cs index 6f3fe495..f1983d88 100644 --- a/src/OpenClaw.Shared/SettingsData.cs +++ b/src/OpenClaw.Shared/SettingsData.cs @@ -34,6 +34,15 @@ public class SettingsData public bool NodeCameraEnabled { get; set; } = true; public bool NodeLocationEnabled { get; set; } = true; public bool NodeBrowserProxyEnabled { get; set; } = true; + public bool NodeTtsEnabled { get; set; } = false; + public string TtsProvider { get; set; } = "windows"; + /// + /// ElevenLabs API key storage slot. When persisted by the Windows tray's + /// SettingsManager this is an opaque dpapi:-prefixed blob, not plaintext. + /// + public string? TtsElevenLabsApiKey { get; set; } + public string? TtsElevenLabsModel { get; set; } + public string? TtsElevenLabsVoiceId { get; set; } /// Run the local MCP HTTP server. Independent of EnableNodeMode. public bool EnableMcpServer { get; set; } = false; /// @@ -48,6 +57,7 @@ public class SettingsData /// [JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)] public bool? McpOnlyMode { get; set; } + public string? PreferredGatewayId { get; set; } public bool HasSeenActivityStreamTip { get; set; } = false; public string? SkippedUpdateTag { get; set; } public bool NotifyChatResponses { get; set; } = true; diff --git a/src/OpenClaw.Shared/WindowsNodeClient.cs b/src/OpenClaw.Shared/WindowsNodeClient.cs index 99fc46cf..a9d4acdc 100644 --- a/src/OpenClaw.Shared/WindowsNodeClient.cs +++ b/src/OpenClaw.Shared/WindowsNodeClient.cs @@ -74,7 +74,7 @@ public class WindowsNodeClient : WebSocketClientBase protected override string ClientRole => "node"; public WindowsNodeClient(string gatewayUrl, string token, string dataPath, IOpenClawLogger? logger = null, string? bootstrapToken = null) - : base(gatewayUrl, ResolveRequiredCredential(token, bootstrapToken), logger) + : base(gatewayUrl, ResolveRequiredCredential(token, bootstrapToken, dataPath), logger) { _gatewayToken = NormalizeOptionalCredential(token); _bootstrapToken = NormalizeOptionalCredential(bootstrapToken); @@ -98,7 +98,7 @@ private static string NormalizeOptionalCredential(string? credential) return string.IsNullOrWhiteSpace(credential) ? string.Empty : credential; } - private static string ResolveRequiredCredential(string? token, string? bootstrapToken) + private static string ResolveRequiredCredential(string? token, string? bootstrapToken, string dataPath) { var gatewayToken = NormalizeOptionalCredential(token); if (!string.IsNullOrEmpty(gatewayToken)) @@ -112,6 +112,12 @@ private static string ResolveRequiredCredential(string? token, string? bootstrap return bootstrap; } + var storedDeviceToken = DeviceIdentity.TryReadStoredDeviceToken(dataPath); + if (!string.IsNullOrEmpty(storedDeviceToken)) + { + return storedDeviceToken; + } + throw new ArgumentException("Token or bootstrap token is required.", nameof(token)); } diff --git a/src/OpenClaw.Tray.WinUI/App.xaml b/src/OpenClaw.Tray.WinUI/App.xaml index 8b705905..6c3e4cd7 100644 --- a/src/OpenClaw.Tray.WinUI/App.xaml +++ b/src/OpenClaw.Tray.WinUI/App.xaml @@ -14,6 +14,9 @@ + + +