From 04b055210b111c8a2d70bdf5c19ca4c6b0d2a479 Mon Sep 17 00:00:00 2001 From: Cameron Cooke Date: Wed, 13 May 2026 14:17:41 +0100 Subject: [PATCH] feat(ui-automation): Add rs/1 runtime automation parity Add batch execution, wait predicates, runtime snapshot refs, and screen-hash unchanged responses so agents can drive AXe with fewer process launches and less repeated snapshot output. Tighten action validation, stale-snapshot recovery, compact rendering, and fixture coverage so UI automation flows are easier for agents to use reliably. Co-Authored-By: OpenAI Codex --- CHANGELOG.md | 52 + example_projects/.xcodebuildmcp/config.yaml | 2 - .../Weather/.xcodebuildmcp/config.yaml | 2 +- example_projects/Weather/README.md | 4 +- .../Services/MockWeatherAPIClient.swift | 6 +- .../Views/Overlays/LocationPickerView.swift | 9 +- .../Weather/Views/Overlays/LocationRows.swift | 1 + .../Views/Overlays/SettingsSheetView.swift | 1 + .../Weather/WeatherTests/WeatherTests.swift | 3 + .../iOS/.xcodebuildmcp/config.yaml | 2 +- .../iOS_Calculator/.xcodebuildmcp/config.yaml | 3 +- manifests/tools/batch.yaml | 16 + manifests/tools/button.yaml | 2 +- manifests/tools/gesture.yaml | 2 +- manifests/tools/key_press.yaml | 4 +- manifests/tools/key_sequence.yaml | 4 +- manifests/tools/long_press.yaml | 6 +- manifests/tools/screenshot.yaml | 2 +- manifests/tools/snapshot_ui.yaml | 24 +- manifests/tools/swipe.yaml | 6 +- manifests/tools/tap.yaml | 6 +- manifests/tools/touch.yaml | 6 +- manifests/tools/type_text.yaml | 6 +- manifests/tools/wait_for_ui.yaml | 28 + manifests/workflows/ui-automation.yaml | 4 +- .../2.schema.json | 260 +++++ .../2.schema.json | 240 +++++ scripts/bundle-axe.sh | 41 +- .../__tests__/register-tool-commands.test.ts | 546 ++++++++++ src/cli/__tests__/schema-to-yargs.test.ts | 14 + src/cli/register-tool-commands.ts | 45 +- src/cli/schema-to-yargs.ts | 20 +- .../structured-output-schema.test.ts | 119 +++ .../__tests__/_keyboard_shortcut.test.ts | 2 + .../_keyboard_shortcut.ts | 2 +- .../simulator/__tests__/boot_sim.test.ts | 43 + .../__tests__/install_app_sim.test.ts | 53 + .../__tests__/launch_app_sim.test.ts | 49 +- .../simulator/__tests__/screenshot.test.ts | 109 +- .../simulator/__tests__/stop_app_sim.test.ts | 33 + src/mcp/tools/simulator/boot_sim.ts | 45 +- src/mcp/tools/simulator/install_app_sim.ts | 29 +- src/mcp/tools/simulator/launch_app_sim.ts | 31 +- src/mcp/tools/simulator/stop_app_sim.ts | 31 +- .../ui-automation/__tests__/batch.test.ts | 211 ++++ .../ui-automation/__tests__/button.test.ts | 70 +- .../ui-automation/__tests__/gesture.test.ts | 4 + .../ui-automation/__tests__/key_press.test.ts | 2 + .../__tests__/key_sequence.test.ts | 4 + .../__tests__/long_press.test.ts | 500 +++------ .../__tests__/non_streaming_progress.test.ts | 107 +- .../__tests__/runtime-snapshot.test.ts | 562 +++++++++++ .../__tests__/screenshot.test.ts | 168 +++- .../__tests__/snapshot-ui-state.test.ts | 142 +++ .../__tests__/snapshot_ui.test.ts | 632 +++++++++++- .../ui-automation/__tests__/swipe.test.ts | 626 ++++-------- .../tools/ui-automation/__tests__/tap.test.ts | 947 ++++++------------ .../ui-automation/__tests__/touch.test.ts | 709 +++---------- .../ui-automation/__tests__/type_text.test.ts | 757 +++++++------- .../__tests__/ui-action-test-helpers.ts | 88 ++ .../__tests__/wait_for_ui.test.ts | 708 +++++++++++++ src/mcp/tools/ui-automation/batch.ts | 142 +++ src/mcp/tools/ui-automation/button.ts | 23 +- src/mcp/tools/ui-automation/gesture.ts | 16 +- src/mcp/tools/ui-automation/key_press.ts | 11 +- src/mcp/tools/ui-automation/key_sequence.ts | 15 +- src/mcp/tools/ui-automation/long_press.ts | 50 +- src/mcp/tools/ui-automation/screenshot.ts | 61 +- .../ui-automation/shared/domain-result.ts | 47 +- .../ui-automation/shared/runtime-snapshot.ts | 701 +++++++++++++ .../ui-automation/shared/semantic-tap.ts | 138 +++ .../ui-automation/shared/snapshot-ui-state.ts | 145 ++- .../ui-automation/shared/wait-predicate.ts | 361 +++++++ src/mcp/tools/ui-automation/snapshot_ui.ts | 194 +++- src/mcp/tools/ui-automation/swipe.ts | 83 +- src/mcp/tools/ui-automation/tap.ts | 185 ++-- src/mcp/tools/ui-automation/touch.ts | 78 +- src/mcp/tools/ui-automation/type_text.ts | 109 +- src/mcp/tools/ui-automation/wait_for_ui.ts | 365 +++++++ src/rendering/render.ts | 10 +- src/runtime/__tests__/tool-invoker.test.ts | 44 + src/runtime/tool-invoker.ts | 24 +- .../long-press--error-no-simulator.txt | 8 +- .../cli/ui-automation/long-press--success.txt | 6 +- .../ui-automation/snapshot-ui--success.txt | 604 +---------- .../swipe--error-no-simulator.txt | 8 +- .../swipe--error-not-actionable.txt | 11 + .../cli/ui-automation/swipe--success.txt | 10 - .../ui-automation/tap--error-no-simulator.txt | 8 +- .../cli/ui-automation/tap--success.txt | 6 +- .../touch--error-no-simulator.txt | 8 +- .../cli/ui-automation/touch--success.txt | 6 +- .../type-text--error-no-simulator.txt | 8 +- .../type-text--error-not-actionable.txt | 11 + .../cli/ui-automation/type-text--success.txt | 6 - .../ui-automation/wait-for-ui--success.txt | 36 + .../button--error-no-simulator.json | 2 +- .../json/ui-automation/button--success.json | 2 +- .../gesture--error-no-simulator.json | 2 +- .../json/ui-automation/gesture--success.json | 2 +- .../key-press--error-no-simulator.json | 2 +- .../ui-automation/key-press--success.json | 2 +- .../key-sequence--error-no-simulator.json | 2 +- .../ui-automation/key-sequence--success.json | 2 +- .../long-press--error-no-simulator.json | 18 +- .../ui-automation/long-press--success.json | 13 +- .../snapshot-ui--error-no-simulator.json | 2 +- .../ui-automation/snapshot-ui--success.json | 517 +--------- .../swipe--error-no-simulator.json | 19 +- .../swipe--error-not-actionable.json | 27 + .../json/ui-automation/swipe--success.json | 33 - .../tap--error-no-simulator.json | 18 +- .../json/ui-automation/tap--success.json | 13 +- .../touch--error-no-simulator.json | 19 +- .../json/ui-automation/touch--success.json | 15 +- .../type-text--error-no-simulator.json | 19 +- .../type-text--error-not-actionable.json | 27 + .../ui-automation/type-text--success.json | 17 - .../ui-automation/wait-for-ui--success.json | 44 + .../long-press--error-no-simulator.txt | 8 +- .../mcp/ui-automation/long-press--success.txt | 6 +- .../ui-automation/snapshot-ui--success.txt | 604 +---------- .../swipe--error-no-simulator.txt | 8 +- .../swipe--error-not-actionable.txt | 9 + .../mcp/ui-automation/swipe--success.txt | 8 - .../ui-automation/tap--error-no-simulator.txt | 8 +- .../mcp/ui-automation/tap--success.txt | 6 +- .../touch--error-no-simulator.txt | 8 +- .../mcp/ui-automation/touch--success.txt | 6 +- .../type-text--error-no-simulator.txt | 8 +- .../type-text--error-not-actionable.txt | 9 + .../mcp/ui-automation/type-text--success.txt | 4 - .../ui-automation/wait-for-ui--success.txt | 34 + .../__tests__/json-normalize.test.ts | 58 ++ src/snapshot-tests/json-normalize.ts | 18 +- .../suites/ui-automation-suite.ts | 98 +- src/types/domain-results.ts | 39 +- src/types/ui-snapshot.ts | 148 +++ .../structured-output-envelope.test.ts | 302 ++++++ .../__tests__/cli-text-renderer.test.ts | 492 +++++++++ src/utils/renderers/cli-text-renderer.ts | 10 +- src/utils/renderers/domain-result-text.ts | 305 +++++- src/utils/structured-output-envelope.ts | 293 +++++- 143 files changed, 10181 insertions(+), 4813 deletions(-) create mode 100644 manifests/tools/batch.yaml create mode 100644 manifests/tools/wait_for_ui.yaml create mode 100644 schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json create mode 100644 schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json create mode 100644 src/mcp/tools/ui-automation/__tests__/batch.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts create mode 100644 src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts create mode 100644 src/mcp/tools/ui-automation/batch.ts create mode 100644 src/mcp/tools/ui-automation/shared/runtime-snapshot.ts create mode 100644 src/mcp/tools/ui-automation/shared/semantic-tap.ts create mode 100644 src/mcp/tools/ui-automation/shared/wait-predicate.ts create mode 100644 src/mcp/tools/ui-automation/wait_for_ui.ts create mode 100644 src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--success.txt create mode 100644 src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--success.txt create mode 100644 src/snapshot-tests/__fixtures__/cli/ui-automation/wait-for-ui--success.txt create mode 100644 src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-not-actionable.json delete mode 100644 src/snapshot-tests/__fixtures__/json/ui-automation/swipe--success.json create mode 100644 src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-not-actionable.json delete mode 100644 src/snapshot-tests/__fixtures__/json/ui-automation/type-text--success.json create mode 100644 src/snapshot-tests/__fixtures__/json/ui-automation/wait-for-ui--success.json create mode 100644 src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-not-actionable.txt delete mode 100644 src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--success.txt create mode 100644 src/snapshot-tests/__fixtures__/mcp/ui-automation/wait-for-ui--success.txt create mode 100644 src/types/ui-snapshot.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index da19ad9b6..c8eb64333 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,57 @@ # Changelog +## [Unreleased] + +### Added + +- Added `snapshot_ui sinceScreenHash` / CLI `--since-screen-hash` so callers can skip full runtime snapshot output when the screen hash is unchanged. +- Added `batch` for executing multiple AXe UI automation steps in one simulator session. +- Added `wait_for_ui` for polling rs/1 runtime UI snapshots until UI predicates such as existence, enabled state, focus, text, or settled layout are satisfied. `textContains` can also wait on visible text without a selector when the match is unique. + +### Fixed + +- Fixed compact runtime snapshots so top-level app and window refs are not advertised as swipe targets just because a generic descendant overflows their frame. +- Fixed `wait_for_ui` focus waits so elements that do not expose focus state return a typed recoverable error instead of timing out. +- Fixed invalid `touch` calls so structured output no longer reports a fake touch event when neither `down` nor `up` was requested. +- Fixed compact runtime snapshots so standalone `other` elements, such as keyboard suggestions, are not advertised as swipe targets unless they behave like scrollable containers. +- Fixed runtime snapshots so off-screen elements, and clipped elements whose activation point is offscreen, are not advertised as actionable targets. +- Fixed full-screen swipe gestures so app-level scroll refs avoid unsafe screen edges such as the status bar and notch area. +- Clarified runtime snapshot tips so agents know element refs are snapshot-specific and must come from the latest `snapshot_ui` or `wait_for_ui` output, and only show swipe guidance when the snapshot includes a scroll ref. +- Made `wait_for_ui` `textContains` matching case-insensitive so assertions survive platform text normalization such as keyboard auto-capitalization, treat duplicate exact text matches as successful presence assertions, narrow broad selectors by text before reporting ambiguity, reject `text` on non-`textContains` predicates instead of silently ignoring it, and keep recoverable-error candidates compact in structured output. +- Fixed `tap` on SwiftUI switch element refs by using a touch down/up activation instead of AXe's coordinate tap path. +- Fixed selector fallback for AXe duplicate-match diagnostics that include parenthesized match counts. +- Fixed semantic taps and text-field focusing so element refs with duplicate AXe selectors use their resolved snapshot coordinates immediately. +- Fixed bottom-clipped UI automation targets so taps, touches, and long presses use a visible activation point instead of the hidden center of the accessibility frame. +- Fixed app-level horizontal swipes so full-screen refs use a content-area y-coordinate instead of missing horizontal carousels by swiping near the hero area. +- Fixed CLI commands with `simulatorId`-only contracts so `simulatorName` session defaults are resolved to a simulator ID without adding conflicting simulator arguments to tools that already accept `simulatorName`, and fixed simulator lifecycle tools so name-only defaults resolve before simctl operations. +- Fixed `snapshot_ui` and `wait_for_ui` next steps so they use the resolved simulator ID instead of leaking `SIMULATOR_UUID` placeholders. +- Fixed the Weather example app so saved-location rows are not reused as search-result rows after editing locations. +- Fixed the Weather example app's current-location button so it selects the current saved location instead of appearing as a no-op UI automation target. +- Added a `replaceExisting` option to `type_text` so agents can replace an existing text-field value instead of accidentally appending to it. +- Fixed `type_text` so AXe-unsupported international/accented characters fail before focusing the field, with a clear recoverable error instead of a generic typing failure. +- Fixed `snapshot_ui` next-step guidance so the suggested tap ref prefers useful tappable controls over text fields, sheet grabbers, close buttons, and clear-search buttons. +- Fixed compact runtime snapshot JSON so target ordering matches compact text output and prioritizes useful content targets before low-value sheet chrome. +- Fixed `wait_for_ui` success output so compact text and JSON include the matched elements that satisfied the wait predicate. +- Fixed `wait_for_ui textContains` so duplicate elements with the same matching visible text satisfy presence-style assertions instead of reporting ambiguity. +- Fixed CLI `--style minimal` so final text output suppresses generated next steps for daemon-routed tools as intended. +- Fixed `snapshot_ui` next-step guidance so snapshots with no tappable targets no longer suggest tapping the first non-actionable element. +- Fixed next-step rendering for tools shared across workflows so follow-up commands prefer the workflow that produced the result instead of drifting to another workflow alias. +- Fixed `snapshot_ui` next-step guidance so calculator-style utility and operator buttons no longer outrank more useful digit/content controls. +- Fixed `snapshot_ui` compact text, JSON, and next-step guidance so already-selected segmented controls no longer outrank unselected choices. +- Fixed compact runtime snapshots and next-step guidance so sheet grabbers remain visible as low-priority targets, allowing agents to expand or dismiss sheets without outranking useful content controls. +- Fixed compact wait-match rows so static assertion matches render with `none` instead of exposing low-level long-press/touch actions as if they were primary agent actions. +- Fixed compact runtime snapshot ordering and next-step guidance so destructive controls such as Remove/Delete are demoted behind safer content and navigation targets. +- Clarified simulator keyboard shortcut failures when Simulator.app is running without a visible device window. +- Fixed hardware button automation so successful button presses wait briefly for system UI transitions before returning, reducing stale immediate follow-up snapshots. +- Fixed runtime snapshots so modal sheet hosts remain swipeable after the currently visible sheet content fits inside the viewport. +- Fixed `wait_for_ui` validation so unknown JSON fields are rejected instead of silently broadening waits. +- Fixed CLI numeric array flags so comma-separated values such as `--key-codes 23,18,14` are parsed as numbers instead of failing validation. +- Fixed runtime snapshots so unlabeled internal custom-action nodes, such as SpringBoard icon subviews, are no longer advertised as likely tap targets. +- Fixed AXe bundling so downloaded artifacts must report the pinned AXe version, and dirty local AXe builds require an explicit opt-in. +- Fixed runtime snapshot tips so compact output names all target-ref action tools, including `long_press` and `touch`. +- Clarified key press and key sequence tool descriptions so agents know key codes are AXe/macOS virtual key codes and should prefer `type_text` for text entry. +- Clarified `wait_for_ui` timeout recovery hints so agents know selector fields match exact values and should use `textContains` for partial visible text. + ## [2.5.2] ### Changed diff --git a/example_projects/.xcodebuildmcp/config.yaml b/example_projects/.xcodebuildmcp/config.yaml index 57308faa0..7b569a1d5 100644 --- a/example_projects/.xcodebuildmcp/config.yaml +++ b/example_projects/.xcodebuildmcp/config.yaml @@ -4,13 +4,11 @@ sessionDefaultsProfiles: workspacePath: ./iOS_Calculator/CalculatorApp.xcworkspace scheme: CalculatorApp simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator ios-test: projectPath: ./iOS/MCPTest.xcodeproj scheme: MCPTest simulatorName: iPhone 17 Pro - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 simulatorPlatform: iOS Simulator macos-test: projectPath: ./macOS/MCPTest.xcodeproj diff --git a/example_projects/Weather/.xcodebuildmcp/config.yaml b/example_projects/Weather/.xcodebuildmcp/config.yaml index 6663899fa..899ef7cf5 100644 --- a/example_projects/Weather/.xcodebuildmcp/config.yaml +++ b/example_projects/Weather/.xcodebuildmcp/config.yaml @@ -7,7 +7,7 @@ sentryDisabled: false sessionDefaults: projectPath: Weather.xcodeproj scheme: Weather - simulatorName: iPhone 17 Pro + simulatorName: iPhone 17 Pro Max setupPreferences: platforms: - iOS diff --git a/example_projects/Weather/README.md b/example_projects/Weather/README.md index 8becf8103..6879347f4 100644 --- a/example_projects/Weather/README.md +++ b/example_projects/Weather/README.md @@ -13,9 +13,7 @@ Build and run the app with XcodeBuildMCP first: Then relaunch the installed app with the mock API argument: ```bash -../../build/cli.js simulator launch-app \ - --bundle-id com.sentry.weather.Weather \ - --args=--mock-weather-api +../../build/cli.js simulator launch-app --json '{"bundleId":"com.sentry.weather.Weather","launchArgs":["--mock-weather-api"]}' ``` ## JSON fixtures diff --git a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift index 6217871d5..730549e5e 100644 --- a/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift +++ b/example_projects/Weather/Weather/Services/MockWeatherAPIClient.swift @@ -29,8 +29,10 @@ struct MockWeatherAPIClient: WeatherAPIClient, Sendable { guard !trimmed.isEmpty else { return [] } let needle = trimmed.localizedLowercase - return fixtures.searchPool.filter { location in - location.name.localizedLowercase.contains(needle) + var seenLocationIDs = Set() + return (fixtures.locations + fixtures.searchPool).filter { location in + guard seenLocationIDs.insert(location.id).inserted else { return false } + return location.name.localizedLowercase.contains(needle) || location.subtitle.localizedLowercase.contains(needle) || (location.country?.localizedLowercase.contains(needle) ?? false) } diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift index 7b643f61e..765a9f094 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationPickerView.swift @@ -103,7 +103,7 @@ struct LocationPickerView: View { } private var currentLocationButton: some View { - Button(action: {}) { + Button(action: selectCurrentLocation) { HStack(spacing: 12) { Image(systemName: "location.fill") .font(.system(size: 14)) @@ -145,6 +145,7 @@ struct LocationPickerView: View { onSelect: { select(location) }, onRemove: { remove(location) } ) + .id("saved-\(location.id)-\(isEditing)") } } else if isLoading { ForEach(0..<3, id: \.self) { _ in SearchSkeletonRow() } @@ -160,6 +161,7 @@ struct LocationPickerView: View { onPreview: { preview(location) }, onAdd: { add(location) } ) + .id("search-\(location.id)-\(isSaved(location))-\(justAddedID == location.id)") } } } @@ -229,6 +231,11 @@ struct LocationPickerView: View { justAddedID = location.id } + private func selectCurrentLocation() { + guard let currentLocation = savedLocations.first else { return } + select(currentLocation) + } + private func clearAddedIndicator() async { guard let id = justAddedID else { return } try? await Task.sleep(for: .milliseconds(1_400)) diff --git a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift index a6412cfbb..1fd30bbd3 100644 --- a/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift +++ b/example_projects/Weather/Weather/Views/Overlays/LocationRows.swift @@ -96,6 +96,7 @@ struct SearchLocationRow: View { .frame(maxWidth: .infinity, alignment: .leading) } .buttonStyle(.plain) + .accessibilityValue(saved || added ? "saved" : "not saved") VStack(alignment: .trailing, spacing: 3) { Text(WeatherUnitFormatter.temperatureString(location.temperatureC, units: units)) diff --git a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift index a571db22e..e118c23b0 100644 --- a/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift +++ b/example_projects/Weather/Weather/Views/Overlays/SettingsSheetView.swift @@ -132,6 +132,7 @@ private struct SegmentRow: View { Button(optionLabel(option)) { selection = option } + .accessibilityValue(selection == option ? "selected" : "not selected") .font(.system(size: 13, weight: .medium)) .foregroundStyle(selection == option ? .black : .white) .padding(.horizontal, 14) diff --git a/example_projects/Weather/WeatherTests/WeatherTests.swift b/example_projects/Weather/WeatherTests/WeatherTests.swift index 1a8d8f9bb..c0a2ae8bb 100644 --- a/example_projects/Weather/WeatherTests/WeatherTests.swift +++ b/example_projects/Weather/WeatherTests/WeatherTests.swift @@ -47,6 +47,9 @@ struct WeatherTests { let byCountry = try await service.searchLocations(matching: "gb") #expect(byCountry.map(\.name).contains("London")) + + let savedLocationByName = try await service.searchLocations(matching: "tokyo") + #expect(savedLocationByName.contains { $0.name == "Tokyo" }) } @Test func emptySearchReturnsNoResults() async throws { diff --git a/example_projects/iOS/.xcodebuildmcp/config.yaml b/example_projects/iOS/.xcodebuildmcp/config.yaml index 568d5e4d2..ee106f9ab 100644 --- a/example_projects/iOS/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS/.xcodebuildmcp/config.yaml @@ -3,7 +3,7 @@ enabledWorkflows: ['simulator', 'ui-automation', 'debugging', 'logging'] sessionDefaults: projectPath: ./MCPTest.xcodeproj scheme: MCPTest - simulatorId: B38FE93D-578B-454B-BE9A-C6FA0CE5F096 + simulatorName: iPhone 17 Pro useLatestOS: true platform: iOS Simulator bundleId: io.sentry.MCPTest diff --git a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml index b84c72162..44458a91a 100644 --- a/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml +++ b/example_projects/iOS_Calculator/.xcodebuildmcp/config.yaml @@ -11,12 +11,11 @@ sessionDefaults: workspacePath: CalculatorApp.xcworkspace scheme: CalculatorApp configuration: Debug - simulatorId: A2C64636-37E9-4B68-B872-E7F0A82A5670 simulatorPlatform: iOS Simulator useLatestOS: true arch: arm64 suppressWarnings: false - derivedDataPath: ./iOS_Calculator/.derivedData + derivedDataPath: ./.build/DerivedData preferXcodebuild: true bundleId: io.sentry.calculatorapp simulatorName: iPhone 17 Pro diff --git a/manifests/tools/batch.yaml b/manifests/tools/batch.yaml new file mode 100644 index 000000000..b9209d703 --- /dev/null +++ b/manifests/tools/batch.yaml @@ -0,0 +1,16 @@ +id: batch +module: mcp/tools/ui-automation/batch +names: + mcp: batch + cli: batch +description: Execute multiple AXe UI interaction steps in one simulator session to reduce process launches. +outputSchema: + schema: xcodebuildmcp.output.ui-action-result + version: "2" +routing: + stateful: true +annotations: + title: Batch UI Actions + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/tools/button.yaml b/manifests/tools/button.yaml index f2e8ee1f3..2dd5068a0 100644 --- a/manifests/tools/button.yaml +++ b/manifests/tools/button.yaml @@ -6,7 +6,7 @@ names: description: Press simulator hardware button. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" annotations: title: Hardware Button readOnlyHint: true diff --git a/manifests/tools/gesture.yaml b/manifests/tools/gesture.yaml index 15c989e38..a543f2303 100644 --- a/manifests/tools/gesture.yaml +++ b/manifests/tools/gesture.yaml @@ -6,7 +6,7 @@ names: description: Simulator gesture preset. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" annotations: title: Gesture readOnlyHint: true diff --git a/manifests/tools/key_press.yaml b/manifests/tools/key_press.yaml index 23671da20..b282b3fb4 100644 --- a/manifests/tools/key_press.yaml +++ b/manifests/tools/key_press.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/key_press names: mcp: key_press cli: key-press -description: Press key by keycode. +description: Press one hardware key using an AXe HID key code. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" annotations: title: Key Press readOnlyHint: true diff --git a/manifests/tools/key_sequence.yaml b/manifests/tools/key_sequence.yaml index 3ef142f7c..9b2cb8bf3 100644 --- a/manifests/tools/key_sequence.yaml +++ b/manifests/tools/key_sequence.yaml @@ -3,10 +3,10 @@ module: mcp/tools/ui-automation/key_sequence names: mcp: key_sequence cli: key-sequence -description: Press a sequence of keys by their keycodes. +description: Press hardware keys using AXe HID key codes. Prefer type_text for text entry. Common values include 40 Return/Enter, 42 Backspace, 43 Tab, and 44 Space. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" annotations: title: Key Sequence readOnlyHint: true diff --git a/manifests/tools/long_press.yaml b/manifests/tools/long_press.yaml index b1e5a3ab9..7aee1112e 100644 --- a/manifests/tools/long_press.yaml +++ b/manifests/tools/long_press.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/long_press names: mcp: long_press cli: long-press -description: Long press at coords. +description: Long press a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" +routing: + stateful: true annotations: title: Long Press readOnlyHint: true diff --git a/manifests/tools/screenshot.yaml b/manifests/tools/screenshot.yaml index 22c0dd0a9..9b42550f7 100644 --- a/manifests/tools/screenshot.yaml +++ b/manifests/tools/screenshot.yaml @@ -6,7 +6,7 @@ names: description: Capture screenshot. outputSchema: schema: xcodebuildmcp.output.capture-result - version: "1" + version: "2" annotations: title: Screenshot readOnlyHint: true diff --git a/manifests/tools/snapshot_ui.yaml b/manifests/tools/snapshot_ui.yaml index 6bcedf121..5d7622195 100644 --- a/manifests/tools/snapshot_ui.yaml +++ b/manifests/tools/snapshot_ui.yaml @@ -3,28 +3,12 @@ module: mcp/tools/ui-automation/snapshot_ui names: mcp: snapshot_ui cli: snapshot-ui -description: Print view hierarchy with precise view coordinates (x, y, width, height) for visible elements. +description: Capture a semantic rs/1 runtime UI snapshot with stable elementRef targets for UI automation. outputSchema: schema: xcodebuildmcp.output.capture-result - version: "1" -nextSteps: - - label: Refresh after layout changes - toolId: snapshot_ui - params: - simulatorId: SIMULATOR_UUID - when: success - - label: Tap on element - toolId: tap - params: - simulatorId: SIMULATOR_UUID - x: 0 - y: 0 - when: success - - label: Take screenshot for verification - toolId: screenshot - params: - simulatorId: SIMULATOR_UUID - when: success + version: "2" +routing: + stateful: true annotations: title: Snapshot UI readOnlyHint: true diff --git a/manifests/tools/swipe.yaml b/manifests/tools/swipe.yaml index a21c3808d..6d0c00b03 100644 --- a/manifests/tools/swipe.yaml +++ b/manifests/tools/swipe.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/swipe names: mcp: swipe cli: swipe -description: Swipe between points. +description: Swipe within a UI element by withinElementRef and direction from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" +routing: + stateful: true annotations: title: Swipe readOnlyHint: true diff --git a/manifests/tools/tap.yaml b/manifests/tools/tap.yaml index 39b36ba9a..6fdff7fc2 100644 --- a/manifests/tools/tap.yaml +++ b/manifests/tools/tap.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/tap names: mcp: tap cli: tap -description: Tap UI element by accessibility id/label (recommended) or coordinates as fallback. +description: Tap a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" +routing: + stateful: true annotations: title: Tap readOnlyHint: true diff --git a/manifests/tools/touch.yaml b/manifests/tools/touch.yaml index a6a25d4b4..3849c5bc8 100644 --- a/manifests/tools/touch.yaml +++ b/manifests/tools/touch.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/touch names: mcp: touch cli: touch -description: Touch down/up at coords. +description: Send touch down/up events to a UI element by elementRef from a current rs/1 runtime snapshot. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" +routing: + stateful: true annotations: title: Touch readOnlyHint: true diff --git a/manifests/tools/type_text.yaml b/manifests/tools/type_text.yaml index 103e56437..86c825668 100644 --- a/manifests/tools/type_text.yaml +++ b/manifests/tools/type_text.yaml @@ -3,10 +3,12 @@ module: mcp/tools/ui-automation/type_text names: mcp: type_text cli: type-text -description: Type text. +description: Type text into a UI element by elementRef from a current rs/1 runtime snapshot, optionally replacing existing field contents. outputSchema: schema: xcodebuildmcp.output.ui-action-result - version: "1" + version: "2" +routing: + stateful: true annotations: title: Type Text readOnlyHint: true diff --git a/manifests/tools/wait_for_ui.yaml b/manifests/tools/wait_for_ui.yaml new file mode 100644 index 000000000..4ae62a0bc --- /dev/null +++ b/manifests/tools/wait_for_ui.yaml @@ -0,0 +1,28 @@ +id: wait_for_ui +module: mcp/tools/ui-automation/wait_for_ui +names: + mcp: wait_for_ui + cli: wait-for-ui +description: Poll rs/1 runtime UI snapshots until a selector-based UI predicate, selector-free textContains predicate, or selector-free settled predicate is satisfied. Select with elementRef, identifier, label, role, or value when a selector is needed. +outputSchema: + schema: xcodebuildmcp.output.capture-result + version: "2" +routing: + stateful: true +nextSteps: + - label: Refresh runtime snapshot + toolId: snapshot_ui + params: + simulatorId: SIMULATOR_UUID + when: success + - label: Wait again + toolId: wait_for_ui + params: + simulatorId: SIMULATOR_UUID + predicate: settled + when: success +annotations: + title: Wait for UI + readOnlyHint: true + destructiveHint: false + openWorldHint: false diff --git a/manifests/workflows/ui-automation.yaml b/manifests/workflows/ui-automation.yaml index c11e5dd72..6d8dd1f26 100644 --- a/manifests/workflows/ui-automation.yaml +++ b/manifests/workflows/ui-automation.yaml @@ -3,6 +3,9 @@ title: UI Automation description: UI automation and accessibility testing tools for iOS simulators. Perform gestures, interactions, screenshots, and UI analysis for automated testing workflows. targetPlatforms: [iOS] tools: + - snapshot_ui + - wait_for_ui + - batch - tap - touch - long_press @@ -13,4 +16,3 @@ tools: - key_sequence - type_text - screenshot - - snapshot_ui diff --git a/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json new file mode 100644 index 000000000..a6ddb046b --- /dev/null +++ b/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json @@ -0,0 +1,260 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://xcodebuildmcp.com/schemas/structured-output/xcodebuildmcp.output.capture-result/2.schema.json", + "type": "object", + "additionalProperties": false, + "allOf": [ + { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/errorConsistency" + } + ], + "$defs": { + "frame": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] + }, + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] + }, + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" + ] + }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "runtimeActionHint": { + "type": "object", + "additionalProperties": false, + "properties": { + "action": { "$ref": "#/$defs/runtimeActionName" }, + "elementRef": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "label": { "type": "string" } + }, + "required": ["action", "elementRef"] + }, + "runtimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "capturedAtMs": { "type": "integer", "minimum": 0 }, + "expiresAtMs": { "type": "integer", "minimum": 0 }, + "elements": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeElement" } + }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionHint" } + } + }, + "required": [ + "type", + "protocol", + "simulatorId", + "screenHash", + "seq", + "capturedAtMs", + "expiresAtMs", + "elements", + "actions" + ] + }, + "compactRuntimeSnapshot": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "count": { "type": "integer", "minimum": 0 }, + "targets": { + "type": "array", + "items": { "type": "string" } + }, + "scroll": { + "type": "array", + "items": { "type": "string" } + }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "count", "targets", "scroll", "udid"] + }, + "runtimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "protocol": { "const": "rs/1" }, + "simulatorId": { "type": "string" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "protocol", "simulatorId", "screenHash", "seq"] + }, + "compactRuntimeSnapshotUnchanged": { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "runtime-snapshot-unchanged" }, + "rs": { "const": "1" }, + "screenHash": { "type": "string", "minLength": 1 }, + "seq": { "type": "integer", "minimum": 0 }, + "unchanged": { "const": true }, + "udid": { "type": "string" } + }, + "required": ["type", "rs", "screenHash", "seq", "unchanged", "udid"] + }, + "waitPredicate": { + "enum": ["exists", "gone", "enabled", "focused", "textContains", "settled"] + }, + "waitMatch": { + "type": "object", + "additionalProperties": false, + "properties": { + "predicate": { "$ref": "#/$defs/waitPredicate" }, + "matches": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } + } + }, + "required": ["predicate", "matches"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" + ] + }, + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } + }, + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } + }, + "required": ["code", "message", "recoveryHint"] + } + }, + "properties": { + "schema": { "const": "xcodebuildmcp.output.capture-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, + "data": { + "type": "object", + "additionalProperties": false, + "properties": { + "summary": { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/statusSummary" + }, + "artifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "simulatorId": { "type": "string" }, + "screenshotPath": { "type": "string" } + }, + "required": ["simulatorId"] + }, + "capture": { + "oneOf": [ + { + "type": "object", + "additionalProperties": false, + "properties": { + "format": { "type": "string" }, + "width": { "type": "integer", "minimum": 0 }, + "height": { "type": "integer", "minimum": 0 } + }, + "required": ["format", "width", "height"] + }, + { "$ref": "#/$defs/runtimeSnapshot" }, + { "$ref": "#/$defs/compactRuntimeSnapshot" }, + { "$ref": "#/$defs/runtimeSnapshotUnchanged" }, + { "$ref": "#/$defs/compactRuntimeSnapshotUnchanged" } + ] + }, + "diagnostics": { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" + }, + "uiError": { "$ref": "#/$defs/recoverableUiError" }, + "waitMatch": { "$ref": "#/$defs/waitMatch" } + }, + "required": ["summary", "artifacts"] + } + }, + "required": ["schema", "schemaVersion", "didError", "error", "data"] +} diff --git a/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json new file mode 100644 index 000000000..5ec9e99d1 --- /dev/null +++ b/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json @@ -0,0 +1,240 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://xcodebuildmcp.com/schemas/structured-output/xcodebuildmcp.output.ui-action-result/2.schema.json", + "type": "object", + "additionalProperties": false, + "allOf": [ + { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/errorConsistency" + } + ], + "$defs": { + "frame": { + "type": "object", + "additionalProperties": false, + "properties": { + "x": { "type": "number" }, + "y": { "type": "number" }, + "width": { "type": "number" }, + "height": { "type": "number" } + }, + "required": ["x", "y", "width", "height"] + }, + "runtimeActionName": { + "enum": ["tap", "typeText", "longPress", "touch", "swipeWithin"] + }, + "runtimeElementRole": { + "enum": [ + "application", + "button", + "cell", + "image", + "keyboard-key", + "list", + "menu", + "other", + "scroll-view", + "slider", + "switch", + "tab", + "text", + "text-field", + "window" + ] + }, + "runtimeElementState": { + "type": "object", + "additionalProperties": false, + "properties": { + "enabled": { "type": "boolean" }, + "focused": { "type": "boolean" }, + "selected": { "type": "boolean" }, + "visible": { "type": "boolean" } + } + }, + "runtimeElement": { + "type": "object", + "additionalProperties": false, + "properties": { + "ref": { "type": "string", "pattern": "^e[1-9][0-9]*$" }, + "role": { "$ref": "#/$defs/runtimeElementRole" }, + "label": { "type": "string" }, + "value": { "type": "string" }, + "identifier": { "type": "string" }, + "frame": { "$ref": "#/$defs/frame" }, + "state": { "$ref": "#/$defs/runtimeElementState" }, + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/runtimeActionName" } + } + }, + "required": ["ref", "frame", "actions"] + }, + "direction": { + "enum": ["up", "down", "left", "right"] + }, + "recoverableUiError": { + "type": "object", + "additionalProperties": false, + "properties": { + "code": { + "enum": [ + "SNAPSHOT_MISSING", + "SNAPSHOT_EXPIRED", + "SNAPSHOT_PARSE_FAILED", + "ELEMENT_REF_NOT_FOUND", + "TARGET_NOT_FOUND", + "TARGET_AMBIGUOUS", + "TARGET_NOT_ACTIONABLE", + "WAIT_TIMEOUT", + "UI_STATE_CHANGED", + "ACTION_FAILED" + ] + }, + "message": { "type": "string" }, + "recoveryHint": { "type": "string" }, + "elementRef": { "type": "string" }, + "candidates": { + "type": "array", + "items": { + "oneOf": [{ "$ref": "#/$defs/runtimeElement" }, { "type": "string" }] + } + }, + "snapshotAgeMs": { "type": "integer", "minimum": 0 }, + "timeoutMs": { "type": "integer", "minimum": 0 } + }, + "required": ["code", "message", "recoveryHint"] + } + }, + "properties": { + "schema": { "const": "xcodebuildmcp.output.ui-action-result" }, + "schemaVersion": { "const": "2" }, + "didError": { "type": "boolean" }, + "error": { "type": ["string", "null"] }, + "data": { + "type": "object", + "additionalProperties": false, + "properties": { + "summary": { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/statusSummary" + }, + "action": { + "oneOf": [ + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "tap" }, + "elementRef": { "type": "string" } + }, + "required": ["type", "elementRef"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "swipe" }, + "withinElementRef": { "type": "string" }, + "direction": { "$ref": "#/$defs/direction" }, + "durationSeconds": { "type": "number", "minimum": 0 } + }, + "required": ["type", "withinElementRef", "direction"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "touch" }, + "elementRef": { "type": "string" }, + "event": { "type": "string" } + }, + "required": ["type", "elementRef"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "long-press" }, + "elementRef": { "type": "string" }, + "durationMs": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "elementRef", "durationMs"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "button" }, + "button": { "type": "string" } + }, + "required": ["type", "button"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "gesture" }, + "gesture": { "type": "string" } + }, + "required": ["type", "gesture"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "type-text" }, + "elementRef": { "type": "string" }, + "textLength": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "elementRef"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "key-press" }, + "keyCode": { "type": "integer", "minimum": 0 } + }, + "required": ["type", "keyCode"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "key-sequence" }, + "keyCodes": { + "type": "array", + "items": { "type": "integer", "minimum": 0 } + } + }, + "required": ["type", "keyCodes"] + }, + { + "type": "object", + "additionalProperties": false, + "properties": { + "type": { "const": "batch" }, + "stepCount": { "type": "integer", "minimum": 1 } + }, + "required": ["type", "stepCount"] + } + ] + }, + "artifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "simulatorId": { "type": "string" } + }, + "required": ["simulatorId"] + }, + "diagnostics": { + "$ref": "https://xcodebuildmcp.com/schemas/structured-output/_defs/common.schema.json#/$defs/basicDiagnostics" + }, + "uiError": { "$ref": "#/$defs/recoverableUiError" } + }, + "required": ["summary", "action", "artifacts"] + } + }, + "required": ["schema", "schemaVersion", "didError", "error", "data"] +} diff --git a/scripts/bundle-axe.sh b/scripts/bundle-axe.sh index eb15c664b..c692c32a8 100755 --- a/scripts/bundle-axe.sh +++ b/scripts/bundle-axe.sh @@ -94,10 +94,17 @@ else echo "📥 Downloading latest AXe release from GitHub..." - AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/v${PINNED_AXE_VERSION}" - AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-v${PINNED_AXE_VERSION}.tar.gz" - AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}-universal.tar.gz" - AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-v${PINNED_AXE_VERSION}.tar.gz" + if [[ "$PINNED_AXE_VERSION" == staging-* ]]; then + AXE_RELEASE_TAG="$PINNED_AXE_VERSION" + AXE_ASSET_VERSION="$PINNED_AXE_VERSION" + else + AXE_RELEASE_TAG="v${PINNED_AXE_VERSION}" + AXE_ASSET_VERSION="v${PINNED_AXE_VERSION}" + fi + AXE_RELEASE_BASE_URL="https://github.com/cameroncooke/AXe/releases/download/${AXE_RELEASE_TAG}" + AXE_HOMEBREW_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-homebrew-${AXE_ASSET_VERSION}.tar.gz" + AXE_UNIVERSAL_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}-universal.tar.gz" + AXE_LEGACY_URL="${AXE_RELEASE_BASE_URL}/AXe-macOS-${AXE_ASSET_VERSION}.tar.gz" # Create temp directory mkdir -p "$AXE_TEMP_DIR" @@ -258,7 +265,8 @@ if [ "$OS_NAME" = "Darwin" ]; then ad_hoc_sign_bundled_axe_assets fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then + ad_hoc_sign_bundled_axe_assets echo "ℹ️ ${AXE_ARCHIVE_FLAVOR} AXe archive detected; using ad-hoc signatures for local runtime compatibility" else echo "🔏 Verifying AXe signatures..." @@ -284,7 +292,7 @@ if [ "$OS_NAME" = "Darwin" ]; then done < <(find "$BUNDLED_DIR/Frameworks" -name "*.framework" -type d) fi - if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ]; then + if [ "$AXE_ARCHIVE_FLAVOR" = "homebrew" ] || [ "$AXE_ARCHIVE_FLAVOR" = "universal" ] || [ "$AXE_ARCHIVE_FLAVOR" = "local-signed" ]; then echo "ℹ️ Skipping Gatekeeper assessment for ${AXE_ARCHIVE_FLAVOR} AXe archive" else echo "🛡️ Assessing AXe with Gatekeeper..." @@ -316,6 +324,27 @@ else echo "⚠️ Skipping AXe binary verification on non-macOS (detected $OS_NAME)" AXE_VERSION="unknown (verification skipped)" fi +validate_axe_version_metadata() { + if [ "$AXE_VERSION" = "unknown (verification skipped)" ]; then + return + fi + + if [[ "$AXE_VERSION" == *dirty* ]] && [ "${AXE_ALLOW_DIRTY_LOCAL:-0}" != "1" ]; then + echo "❌ Bundled AXe reports a dirty version: $AXE_VERSION" + echo " Rebuild AXe from a clean checkout or set AXE_ALLOW_DIRTY_LOCAL=1 for explicit local testing." + exit 1 + fi + + if [ "$USE_LOCAL_AXE" = false ]; then + if [ "$AXE_VERSION" != "$PINNED_AXE_VERSION" ] && [ "$AXE_VERSION" != "v$PINNED_AXE_VERSION" ]; then + echo "❌ Bundled AXe version '$AXE_VERSION' does not match pinned version '$PINNED_AXE_VERSION'" + exit 1 + fi + fi +} + +validate_axe_version_metadata + echo "📋 AXe version: $AXE_VERSION" # Clean up temp directory if it was used diff --git a/src/cli/__tests__/register-tool-commands.test.ts b/src/cli/__tests__/register-tool-commands.test.ts index 8a744bacb..7bb1d6974 100644 --- a/src/cli/__tests__/register-tool-commands.test.ts +++ b/src/cli/__tests__/register-tool-commands.test.ts @@ -6,6 +6,7 @@ import type { ToolHandlerContext } from '../../rendering/types.ts'; import { DefaultToolInvoker } from '../../runtime/tool-invoker.ts'; import type { ResolvedRuntimeConfig } from '../../utils/config-store.ts'; import { registerToolCommands } from '../register-tool-commands.ts'; +import * as simulatorResolver from '../../utils/simulator-resolver.ts'; function createTool(overrides: Partial = {}): ToolDefinition { return { @@ -176,6 +177,90 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('resolves configured simulatorName for CLI tools that require simulatorId', async () => { + const resolveSimulatorNameToId = vi + .spyOn(simulatorResolver, 'resolveSimulatorNameToId') + .mockResolvedValue({ + success: true, + simulatorId: 'SIM-RESOLVED', + simulatorName: 'iPhone 17 Pro', + }); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + mcpSchema: { + simulatorId: z.string().describe('Simulator ID'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).toHaveBeenCalledWith(expect.any(Function), 'iPhone 17 Pro'); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorId: 'SIM-RESOLVED', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('does not synthesize simulatorId for tools that already accept simulatorName', async () => { + const resolveSimulatorNameToId = vi.spyOn(simulatorResolver, 'resolveSimulatorNameToId'); + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + mcpSchema: { + simulatorId: z.string().optional().describe('Simulator ID'), + simulatorName: z.string().optional().describe('Simulator name'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: { + simulatorName: 'iPhone 17 Pro', + }, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect(app.parseAsync(['simulator', 'run-tool'])).resolves.toBeDefined(); + + expect(resolveSimulatorNameToId).not.toHaveBeenCalled(); + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + simulatorName: 'iPhone 17 Pro', + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + it('keeps the normal missing-argument error when no hydrated default exists', async () => { const consoleError = vi.spyOn(console, 'error').mockImplementation(() => {}); @@ -374,6 +459,97 @@ describe('registerToolCommands', () => { stdoutWrite.mockRestore(); }); + it('parses comma-separated numeric array args', async () => { + const invokeDirect = vi + .spyOn(DefaultToolInvoker.prototype, 'invokeDirect') + .mockResolvedValue(undefined); + const stdoutWrite = vi.spyOn(process.stdout, 'write').mockImplementation(() => true); + + const tool = createTool({ + cliSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + mcpSchema: { + workspacePath: z.string().describe('Workspace path'), + keyCodes: z.array(z.number()).describe('Key codes'), + }, + }); + const app = createApp(createCatalog([tool]), { + ...baseRuntimeConfig, + sessionDefaults: undefined, + sessionDefaultsProfiles: undefined, + activeSessionDefaultsProfile: undefined, + }); + + await expect( + app.parseAsync([ + 'simulator', + 'run-tool', + '--workspace-path', + 'App.xcworkspace', + '--key-codes', + '23,18,14', + ]), + ).resolves.toBeDefined(); + + expect(invokeDirect).toHaveBeenCalledWith( + tool, + { + workspacePath: 'App.xcworkspace', + keyCodes: [23, 18, 14], + }, + expect.any(Object), + ); + + stdoutWrite.mockRestore(); + }); + + it('honors --style minimal by hiding next steps', async () => { + vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( + async (_tool, _args, opts) => { + opts.renderSession?.setStructuredOutput?.({ + schema: 'xcodebuildmcp.output.app-path', + schemaVersion: '1', + result: { + kind: 'app-path', + didError: false, + error: null, + artifacts: { appPath: '/tmp/MyApp.app' }, + }, + }); + opts.renderSession?.setNextSteps?.( + [ + { + label: 'Run again', + tool: 'run_tool', + workflow: 'simulator', + cliTool: 'run-tool', + }, + ], + 'cli', + ); + }, + ); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool(); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--style', 'minimal']), + ).resolves.toBeDefined(); + + const output = stdoutChunks.join(''); + expect(output).toContain('Get App Path'); + expect(output).not.toContain('Next steps:'); + expect(output).not.toContain('Run again'); + }); + it('applies --file-path-render-style to text output without forwarding it to tool args', async () => { vi.spyOn(DefaultToolInvoker.prototype, 'invokeDirect').mockImplementation( async (tool, args, opts) => { @@ -500,6 +676,376 @@ describe('registerToolCommands', () => { ); }); + it('writes compact rs/1 capture JSON for runtime snapshots by default', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + value: 'selected', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81.33, width: 178, height: 33.33 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e3', + role: 'button', + label: 'Sheet Grabber', + value: 'Half screen', + frame: { x: 150, y: 10, width: 80, height: 20 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + { action: 'tap', elementRef: 'e3', label: 'Sheet Grabber' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + expect(stdoutChunks.join('')).toBe( + `${JSON.stringify( + { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-hash', + seq: 1, + count: 3, + targets: ['e2|tap|button|San Francisco|selected|weather.locationButton'], + scroll: ['e1|swipe|application|Weather||'], + udid: 'SIMULATOR-1', + }, + }, + }, + null, + 2, + )}\n`, + ); + }); + + it('orders destructive controls after useful targets in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e2', + role: 'button', + label: 'Portland, 1:24 PM · Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Remove' }, + { action: 'tap', elementRef: 'e2', label: 'Portland, 1:24 PM · Light Rain' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e2|tap|button|Portland, 1:24 PM · Light Rain||', + 'e1|tap|button|Remove||trash', + ]); + }); + + it('orders unselected segmented controls before already-selected controls in compact JSON', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { capture: { targets: string[] } }; + }; + expect(output.data.capture.targets).toEqual([ + 'e10|tap|button|°C|not selected|', + 'e9|tap|button|°F|selected|', + ]); + }); + + it('writes compact wait matches with no primary action for static text', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { + data: { waitMatch: { matches: string[] } }; + }; + expect(output.data.waitMatch.matches).toEqual(['e11|none|text|No matches||']); + }); + + it('writes the full runtime snapshot envelope for verbose JSON output', async () => { + mockInvokeDirectThroughHandler(); + const stdoutChunks: string[] = []; + vi.spyOn(process.stdout, 'write').mockImplementation((chunk) => { + stdoutChunks.push(String(chunk)); + return true; + }); + + const tool = createTool({ + handler: vi.fn(async (_args, ctx) => { + if (ctx) { + ctx.structuredOutput = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], + }, + }, + }; + } + }) as ToolDefinition['handler'], + }); + const app = createApp(createCatalog([tool])); + + await expect( + app.parseAsync(['simulator', 'run-tool', '--output', 'json', '--verbose']), + ).resolves.toBeDefined(); + + const output = JSON.parse(stdoutChunks.join('')) as { schema: string; data: unknown }; + expect(output.schema).toBe('xcodebuildmcp.output.capture-result'); + expect(output.data).toEqual( + expect.objectContaining({ + capture: expect.objectContaining({ + type: 'runtime-snapshot', + elements: [expect.objectContaining({ ref: 'e1', actions: ['swipeWithin'] })], + }), + }), + ); + }); + it('writes one NDJSON line per domain fragment for jsonl output and omits the final envelope', async () => { mockInvokeDirectThroughHandler(); const stdoutChunks: string[] = []; diff --git a/src/cli/__tests__/schema-to-yargs.test.ts b/src/cli/__tests__/schema-to-yargs.test.ts index 014868d80..371191415 100644 --- a/src/cli/__tests__/schema-to-yargs.test.ts +++ b/src/cli/__tests__/schema-to-yargs.test.ts @@ -25,4 +25,18 @@ describe('schemaToYargsOptions', () => { expect(options.get('workspace-path')?.demandOption).toBe(false); }); + + it('coerces comma-separated numeric array flags', () => { + const options = schemaToYargsOptions({ + keyCodes: z.array(z.number()), + }); + + const coerce = options.get('key-codes')?.coerce; + + expect(typeof coerce).toBe('function'); + expect(coerce?.('23,18,14')).toEqual([23, 18, 14]); + expect(coerce?.('23, 18, 14')).toEqual([23, 18, 14]); + expect(coerce?.(['23', '18,14'])).toEqual([23, 18, 14]); + expect(coerce?.('23,')).toEqual([23, Number.NaN]); + }); }); diff --git a/src/cli/register-tool-commands.ts b/src/cli/register-tool-commands.ts index 4c8d6107a..450567f7c 100644 --- a/src/cli/register-tool-commands.ts +++ b/src/cli/register-tool-commands.ts @@ -16,6 +16,7 @@ import { getCliSessionDefaultsForTool, isKnownCliSessionDefaultsProfile, mergeCliSessionDefaults, + resolveCliSessionDefaults, } from './session-defaults.ts'; import { createRenderSession } from '../rendering/render.ts'; import { toStructuredEnvelope } from '../utils/structured-output-envelope.ts'; @@ -25,6 +26,8 @@ import { STRUCTURED_ERROR_SCHEMA_VERSION, } from '../utils/structured-error.ts'; import { toCliJsonlEvent } from './jsonl-event.ts'; +import { resolveSimulatorNameToId } from '../utils/simulator-resolver.ts'; +import { getDefaultCommandExecutor } from '../utils/execution/index.ts'; export interface RegisterToolCommandsOptions { workspaceRoot: string; @@ -92,7 +95,10 @@ function createBufferedHandlerContext( }; } -function writeJsonOutput(handlerContext: ToolHandlerContext): boolean { +function writeJsonOutput( + handlerContext: ToolHandlerContext, + options: { verbose?: boolean } = {}, +): boolean { const structuredOutput = handlerContext.structuredOutput; const envelope = structuredOutput @@ -100,6 +106,7 @@ function writeJsonOutput(handlerContext: ToolHandlerContext): boolean { structuredOutput.result, structuredOutput.schema, structuredOutput.schemaVersion, + { runtimeSnapshot: options.verbose ? 'full' : 'compact' }, ) : toStructuredEnvelope( createStructuredErrorOutput({ @@ -242,12 +249,18 @@ function registerToolSubcommand( describe: 'Output format', }); + subYargs.option('verbose', { + type: 'boolean', + default: false, + describe: 'Render verbose output data when supported', + }); + // Group options for cleaner help display if (toolArgNames.length > 0) { subYargs.group(toolArgNames, 'Tool Arguments:'); } subYargs.group(['profile'], 'Session Defaults:'); - subYargs.group(['json', 'output'], 'Output Options:'); + subYargs.group(['json', 'output', 'verbose'], 'Output Options:'); // Add note about unsupported keys if any if (unsupportedKeys.length > 0) { @@ -277,7 +290,9 @@ function registerToolSubcommand( const outputFormat = (argv.output as OutputFormat) ?? 'text'; const socketPath = argv.socket as string; const logLevel = argv['log-level'] as string | undefined; + const style = argv.style as string | undefined; const filePathRenderStyle = argv.filePathRenderStyle as FilePathRenderStyle | undefined; + const verboseOutput = argv.verbose === true; if ( profileOverride && @@ -312,6 +327,7 @@ function registerToolSubcommand( 'logLevel', 'file-path-render-style', 'filePathRenderStyle', + 'verbose', '_', '$0', ]); @@ -325,6 +341,10 @@ function registerToolSubcommand( // Merge: flag args first, then JSON overrides const explicitArgs = { ...toolParams, ...jsonArgs }; + const rawDefaults = resolveCliSessionDefaults({ + runtimeConfig: opts.runtimeConfig, + profileOverride, + }); const args = mergeCliSessionDefaults({ defaults: getCliSessionDefaultsForTool({ tool, @@ -334,6 +354,24 @@ function registerToolSubcommand( explicitArgs, }); + if ( + args.simulatorId === undefined && + tool.cliSchema.simulatorId !== undefined && + tool.cliSchema.simulatorName === undefined && + typeof rawDefaults.simulatorName === 'string' + ) { + const resolvedSimulator = await resolveSimulatorNameToId( + getDefaultCommandExecutor(), + rawDefaults.simulatorName, + ); + if (!resolvedSimulator.success) { + console.error(`Error: ${resolvedSimulator.error}`); + process.exitCode = 1; + return; + } + args.simulatorId = resolvedSimulator.simulatorId; + } + const missingRequiredFlags = requiredFlagNames.filter((flagName) => { const camelKey = convertArgvToToolParams({ [flagName]: true }); const [toolKey] = Object.keys(camelKey); @@ -362,6 +400,7 @@ function registerToolSubcommand( interactive: outputFormat === 'text' && process.stdout.isTTY === true, runtime: 'cli', filePathRenderStyle, + includeNextSteps: style !== 'minimal', }); const writeJsonlFragment = outputFormat === 'jsonl' @@ -395,7 +434,7 @@ function registerToolSubcommand( } if (outputFormat === 'json') { - if (writeJsonOutput(handlerContext)) { + if (writeJsonOutput(handlerContext, { verbose: verboseOutput })) { process.exitCode = 1; } return; diff --git a/src/cli/schema-to-yargs.ts b/src/cli/schema-to-yargs.ts index 175068097..e68fe8cbf 100644 --- a/src/cli/schema-to-yargs.ts +++ b/src/cli/schema-to-yargs.ts @@ -7,6 +7,16 @@ export interface YargsOptionConfig extends Options { type: 'string' | 'number' | 'boolean' | 'array'; } +function coerceNumberArray(value: unknown): number[] { + const values = Array.isArray(value) ? value : [value]; + return values.flatMap((entry) => + String(entry) + .split(',') + .map((item) => item.trim()) + .map((item) => (item === '' ? Number.NaN : Number(item))), + ); +} + export interface ZodToYargsOptionOptions { hasHydratedDefault?: boolean; } @@ -195,9 +205,17 @@ export function zodToYargsOption( const element = getArrayElement(unwrapped); if (element) { const elemTypeName = getZodTypeName(unwrap(element)); - if (elemTypeName === 'string' || elemTypeName === 'number') { + if (elemTypeName === 'string') { return { type: 'array', describe: description, demandOption: false }; } + if (elemTypeName === 'number') { + return { + type: 'array', + describe: description, + demandOption: false, + coerce: coerceNumberArray, + }; + } } // Complex array types - use --json fallback return null; diff --git a/src/core/__tests__/structured-output-schema.test.ts b/src/core/__tests__/structured-output-schema.test.ts index af8ce4b1b..1e1fa44b1 100644 --- a/src/core/__tests__/structured-output-schema.test.ts +++ b/src/core/__tests__/structured-output-schema.test.ts @@ -150,6 +150,125 @@ describe('structured output schema bundling', () => { ).toBe(true); }); + it('accepts ui automation v2 runtime snapshots and semantic action errors', () => { + const ajv = new Ajv2020({ allErrors: true, strict: true, validateSchema: true }); + const captureValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.capture-result', version: '2' }), + ); + const actionValidate = ajv.compile( + getMcpOutputSchema({ schema: 'xcodebuildmcp.output.ui-action-result', version: '2' }), + ); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 100, height: 40 }, + state: { enabled: true, selected: true, visible: true }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIM-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }), + ).toBe(true); + + expect( + captureValidate({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIM-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-hash', + seq: 2, + unchanged: true, + udid: 'SIM-1', + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: true, + error: 'Element ref was not found in the current snapshot.', + data: { + summary: { status: 'FAILED' }, + action: { type: 'tap', elementRef: 'e404' }, + artifacts: { simulatorId: 'SIM-1' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element ref was not found in the current snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }, + }, + }), + ).toBe(true); + + expect( + actionValidate({ + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + action: { type: 'batch', stepCount: 2 }, + artifacts: { simulatorId: 'SIM-1' }, + }, + }), + ).toBe(true); + }); + it('accepts xcode bridge call-result artifacts', () => { const schema = getMcpOutputSchema({ schema: 'xcodebuildmcp.output.xcode-bridge-call-result', diff --git a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts index b2325eed9..f9c677b82 100644 --- a/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts +++ b/src/mcp/tools/simulator-management/__tests__/_keyboard_shortcut.test.ts @@ -183,6 +183,8 @@ describe('sendKeyboardShortcut', () => { expect(result.success).toBe(false); if (!result.success) { expect(result.error).toContain('iPhone 15 Pro'); + expect(result.error).toContain('without a device window'); + expect(result.error).toContain('retry the keyboard shortcut'); } expect(calls).toHaveLength(3); }); diff --git a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts index a4ea377e3..1eb5c67ae 100644 --- a/src/mcp/tools/simulator-management/_keyboard_shortcut.ts +++ b/src/mcp/tools/simulator-management/_keyboard_shortcut.ts @@ -123,7 +123,7 @@ export async function sendKeyboardShortcut( if (focusResult.output.trim() === 'NO_WINDOW') { return { success: false, - error: `No Simulator window found for "${device.name}". Is the simulator window visible?`, + error: `No visible Simulator window found for "${device.name}". Simulator.app may be running without a device window; open the simulator device window manually, then retry the keyboard shortcut.`, }; } diff --git a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts index 1bc852458..cce693cd3 100644 --- a/src/mcp/tools/simulator/__tests__/boot_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/boot_sim.test.ts @@ -8,6 +8,12 @@ import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, boot_simLogic } from '../boot_sim.ts'; import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('boot_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -105,6 +111,43 @@ describe('boot_sim tool', () => { expect(result.isError).toBe(true); }); + it('should resolve simulatorName before booting', async () => { + const calls: Array<{ + command: string[]; + description?: string; + allowStderr?: boolean; + }> = []; + const mockExecutor = async ( + command: string[], + description?: string, + allowStderr?: boolean, + ) => { + calls.push({ command, description, allowStderr }); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ + success: true, + output: 'Simulator booted successfully', + }); + }; + + const result = await runLogic(() => + boot_simLogic({ simulatorName: 'iPhone 17' }, mockExecutor), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + install_app_sim: { simulatorId: 'resolved-uuid', appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'YOUR_APP_BUNDLE_ID' }, + }); + expect(calls.map((call) => call.command)).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'boot', 'resolved-uuid'], + ]); + }); + it('should verify command generation with mock executor', async () => { const calls: Array<{ command: string[]; diff --git a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts index 8872f3061..43aaa9312 100644 --- a/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/install_app_sim.test.ts @@ -11,6 +11,12 @@ import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { schema, handler, install_app_simLogic } from '../install_app_sim.ts'; import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + describe('install_app_sim tool', () => { beforeEach(() => { sessionStore.clear(); @@ -100,6 +106,53 @@ describe('install_app_sim tool', () => { ]); }); + it('should resolve simulatorName before installing', async () => { + const executorCalls: Array> = []; + const mockExecutor: CommandExecutor = (...args) => { + executorCalls.push(args); + const command = args[0]; + if (command.includes('list')) { + return Promise.resolve( + createMockCommandResponse({ success: true, output: availableSimulatorsJson }), + ); + } + if (command[0] === 'defaults') { + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'io.sentry.myapp' }), + ); + } + return Promise.resolve( + createMockCommandResponse({ success: true, output: 'App installed' }), + ); + }; + + const mockFileSystem = createMockFileSystemExecutor({ + existsSync: () => true, + }); + + const result = await runLogic(() => + install_app_simLogic( + { + simulatorName: 'iPhone 17', + appPath: '/path/to/app.app', + }, + mockExecutor, + mockFileSystem, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + launch_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.myapp' }, + }); + expect(executorCalls.map((call) => call[0])).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'install', 'resolved-uuid', '/path/to/app.app'], + ['defaults', 'read', '/path/to/app.app/Info', 'CFBundleIdentifier'], + ]); + }); + it('should generate command with different simulator identifier', async () => { const executorCalls: Array> = []; const mockExecutor: CommandExecutor = (...args) => { diff --git a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts index 583c5ca9c..3680374c0 100644 --- a/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/launch_app_sim.test.ts @@ -1,11 +1,20 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import { + createMockCommandResponse, + createMockExecutor, +} from '../../../../test-utils/mock-executors.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; import { schema, handler, launch_app_simLogic, type SimulatorLauncher } from '../launch_app_sim.ts'; import type { LaunchWithLoggingResult } from '../../../../utils/simulator-steps.ts'; import { runLogic } from '../../../../test-utils/test-helpers.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createMockLauncher(overrides?: Partial): SimulatorLauncher { return async (_uuid, _bundleId, _executor, _opts?) => ({ success: true, @@ -143,6 +152,44 @@ describe('launch_app_sim tool', () => { expect(capturedEnv).toEqual({ STAGING_ENABLED: '1' }); }); + it('should resolve simulatorName before checking install and launching', async () => { + const executorCalls: string[][] = []; + const installCheckExecutor = async (command: string[]) => { + executorCalls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '/path/to/app/container' }); + }; + let launchedUuid: string | undefined; + const trackingLauncher: SimulatorLauncher = async (uuid, _bundleId, _executor, _opts?) => { + launchedUuid = uuid; + return { success: true, processId: 12345, logFilePath: '/tmp/test.log' }; + }; + + const result = await runLogic(() => + launch_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.testapp', + }, + installCheckExecutor, + trackingLauncher, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(launchedUuid).toBe('resolved-uuid'); + expect(executorCalls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'get_app_container', 'resolved-uuid', 'io.sentry.testapp', 'app'], + ]); + expect(result.nextStepParams).toEqual({ + open_sim: {}, + stop_app_sim: { simulatorId: 'resolved-uuid', bundleId: 'io.sentry.testapp' }, + }); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const installCheckExecutor = async () => ({ success: true, diff --git a/src/mcp/tools/simulator/__tests__/screenshot.test.ts b/src/mcp/tools/simulator/__tests__/screenshot.test.ts index f32e80ca1..0937243ac 100644 --- a/src/mcp/tools/simulator/__tests__/screenshot.test.ts +++ b/src/mcp/tools/simulator/__tests__/screenshot.test.ts @@ -13,6 +13,15 @@ import { schema, handler, screenshotLogic } from '../../ui-automation/screenshot import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; describe('screenshot plugin', () => { + const bootedDeviceListJson = JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { udid: 'test-uuid', name: 'iPhone 15 Pro', state: 'Booted' }, + { udid: 'another-uuid', name: 'iPhone 15', state: 'Booted' }, + ], + }, + }); + beforeEach(() => { sessionStore.clear(); }); @@ -89,7 +98,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -98,10 +111,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_mock-uuid-123.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -168,7 +177,11 @@ describe('screenshot plugin', () => { expect(capturedCommands).toHaveLength(5); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); + + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -177,10 +190,6 @@ describe('screenshot plugin', () => { '/tmp/screenshot_different-uuid-456.png', ]); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); - expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -234,21 +243,21 @@ describe('screenshot plugin', () => { ), ); - // Should execute all commands in sequence: screenshot, list devices, orientation detection, optimization, dimensions + // Should execute all commands in sequence: list devices, screenshot, orientation detection, optimization, dimensions expect(capturedCommands).toHaveLength(5); - const firstCommand = capturedCommands[0]; - expect(firstCommand).toHaveLength(6); - expect(firstCommand[0]).toBe('xcrun'); - expect(firstCommand[1]).toBe('simctl'); - expect(firstCommand[2]).toBe('io'); - expect(firstCommand[3]).toBe('test-uuid'); - expect(firstCommand[4]).toBe('screenshot'); - expect(firstCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); + expect(capturedCommands[0][0]).toBe('xcrun'); + expect(capturedCommands[0][1]).toBe('simctl'); + expect(capturedCommands[0][2]).toBe('list'); - expect(capturedCommands[1][0]).toBe('xcrun'); - expect(capturedCommands[1][1]).toBe('simctl'); - expect(capturedCommands[1][2]).toBe('list'); + const screenshotCommand = capturedCommands[1]; + expect(screenshotCommand).toHaveLength(6); + expect(screenshotCommand[0]).toBe('xcrun'); + expect(screenshotCommand[1]).toBe('simctl'); + expect(screenshotCommand[2]).toBe('io'); + expect(screenshotCommand[3]).toBe('test-uuid'); + expect(screenshotCommand[4]).toBe('screenshot'); + expect(screenshotCommand[5]).toMatch(/\/.*\/screenshot_.*\.png/); expect(capturedCommands[2][0]).toBe('swift'); expect(capturedCommands[2][1]).toBe('-e'); @@ -267,7 +276,9 @@ describe('screenshot plugin', () => { const mockImageBuffer = Buffer.from('fake-image-data'); const mockExecutor = createCommandMatchingMockExecutor({ - 'xcrun simctl': { success: true, output: 'Screenshot saved' }, + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, sips: { success: true, output: 'Image optimized' }, }); @@ -320,11 +331,21 @@ describe('screenshot plugin', () => { }); it('should handle command failure', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Command failed', - }); + const mockExecutor: CommandExecutor = async (command) => { + const cmdStr = command.join(' '); + if (cmdStr.includes('simctl list devices')) { + return { + success: true, + output: bootedDeviceListJson, + error: undefined, + process: mockProcess, + }; + } + if (cmdStr.includes('simctl io')) { + return { success: false, output: '', error: 'Command failed', process: mockProcess }; + } + return { success: true, output: '', error: undefined, process: mockProcess }; + }; const mockPathDeps = { tmpdir: () => '/tmp', @@ -354,10 +375,11 @@ describe('screenshot plugin', () => { }); it('should handle file read failure', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ @@ -446,18 +468,18 @@ describe('screenshot plugin', () => { expect(capturedArgs).toHaveLength(5); - expect(capturedArgs[0]).toEqual([ + expect(capturedArgs[0][0][0]).toBe('xcrun'); + expect(capturedArgs[0][0][1]).toBe('simctl'); + expect(capturedArgs[0][0][2]).toBe('list'); + expect(capturedArgs[0][1]).toBe('[Screenshot]: list devices'); + expect(capturedArgs[0][2]).toBe(false); + + expect(capturedArgs[1]).toEqual([ ['xcrun', 'simctl', 'io', 'test-uuid', 'screenshot', '/tmp/screenshot_mock-uuid-123.png'], '[Screenshot]: screenshot', false, ]); - expect(capturedArgs[1][0][0]).toBe('xcrun'); - expect(capturedArgs[1][0][1]).toBe('simctl'); - expect(capturedArgs[1][0][2]).toBe('list'); - expect(capturedArgs[1][1]).toBe('[Screenshot]: list devices'); - expect(capturedArgs[1][2]).toBe(false); - expect(capturedArgs[2][0][0]).toBe('swift'); expect(capturedArgs[2][0][1]).toBe('-e'); expect(capturedArgs[2][1]).toBe('[Screenshot]: detect orientation'); @@ -578,10 +600,11 @@ describe('screenshot plugin', () => { }); it('should handle file read error with fileSystemExecutor', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, + const mockExecutor = createCommandMatchingMockExecutor({ + 'xcrun simctl list devices': { success: true, output: bootedDeviceListJson }, + 'xcrun simctl io': { success: true, output: 'Screenshot saved' }, + 'swift -e': { success: true, output: '' }, + sips: { success: true, output: 'Image optimized' }, }); const mockFileSystemExecutor = createMockFileSystemExecutor({ diff --git a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts index c89893617..1a93677c9 100644 --- a/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts +++ b/src/mcp/tools/simulator/__tests__/stop_app_sim.test.ts @@ -22,6 +22,12 @@ import * as path from 'node:path'; import type { ChildProcess } from 'node:child_process'; import { setRuntimeInstanceForTests } from '../../../../utils/runtime-instance.ts'; +const availableSimulatorsJson = JSON.stringify({ + devices: { + 'iOS 26.0': [{ name: 'iPhone 17', udid: 'resolved-uuid', isAvailable: true }], + }, +}); + function createTrackedChild(options?: { pid?: number; killImplementation?: (signal?: NodeJS.Signals | number) => boolean; @@ -191,6 +197,33 @@ describe('stop_app_sim tool', () => { expect(text).not.toContain('Tracked OSLog sessions cleaned up'); }); + it('should resolve simulatorName before stopping', async () => { + const calls: string[][] = []; + const mockExecutor: CommandExecutor = async (command) => { + calls.push(command); + if (command.includes('list')) { + return createMockCommandResponse({ success: true, output: availableSimulatorsJson }); + } + return createMockCommandResponse({ success: true, output: '' }); + }; + + const result = await runLogic(() => + stop_app_simLogic( + { + simulatorName: 'iPhone 17', + bundleId: 'io.sentry.App', + }, + mockExecutor, + ), + ); + + expect(result.isError).toBeFalsy(); + expect(calls).toEqual([ + ['xcrun', 'simctl', 'list', 'devices', 'available', '-j'], + ['xcrun', 'simctl', 'terminate', 'resolved-uuid', 'io.sentry.App'], + ]); + }); + it('should display friendly name when simulatorName is provided alongside resolved simulatorId', async () => { const mockExecutor = createMockExecutor({ success: true, output: '' }); diff --git a/src/mcp/tools/simulator/boot_sim.ts b/src/mcp/tools/simulator/boot_sim.ts index eb6eac273..9ac28af60 100644 --- a/src/mcp/tools/simulator/boot_sim.ts +++ b/src/mcp/tools/simulator/boot_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { createBasicDiagnostics } from '../../../utils/diagnostics.ts'; @@ -30,11 +31,12 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), }); type BootSimParams = z.infer; +type ResolvedBootSimParams = BootSimParams & { simulatorId: string }; type BootSimResult = SimulatorActionResultDomainResult; const publicSchemaObject = z.strictObject( @@ -45,7 +47,7 @@ const publicSchemaObject = z.strictObject( ); function createBootSimResult(params: { - simulatorId: string; + simulatorId?: string; didError: boolean; error?: string; diagnosticMessage?: string; @@ -63,9 +65,13 @@ function createBootSimResult(params: { ...(params.diagnosticMessage ? { diagnostics: createBasicDiagnostics({ errors: [params.diagnosticMessage] }) } : {}), - artifacts: { - simulatorId: params.simulatorId, - }, + ...(params.simulatorId + ? { + artifacts: { + simulatorId: params.simulatorId, + }, + } + : {}), }; } @@ -79,7 +85,7 @@ function setStructuredOutput(ctx: ToolHandlerContext, result: BootSimResult): vo export function createBootSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { try { const result = await executor( @@ -118,11 +124,28 @@ export async function boot_simLogic( params: BootSimParams, executor: CommandExecutor, ): Promise { - log('info', `Starting xcrun simctl boot request for simulator ${params.simulatorId}`); - const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = createBootSimResult({ + didError: true, + error: 'Boot simulator operation failed.', + diagnosticMessage: `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + }); + setStructuredOutput(ctx, result); + log('error', `Error during boot simulator operation: ${result.error ?? 'Unknown error'}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedBootSimParams = { ...params, simulatorId: simulatorResult.uuid }; + log('info', `Starting xcrun simctl boot request for simulator ${resolvedParams.simulatorId}`); + const executeBootSim = createBootSimExecutor(executor); - const result = await executeBootSim(params); + const result = await executeBootSim(resolvedParams); setStructuredOutput(ctx, result); if (result.didError) { @@ -132,8 +155,8 @@ export async function boot_simLogic( ctx.nextStepParams = { open_sim: {}, - install_app_sim: { simulatorId: params.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, - launch_app_sim: { simulatorId: params.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, + install_app_sim: { simulatorId: resolvedParams.simulatorId, appPath: 'PATH_TO_YOUR_APP' }, + launch_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: 'YOUR_APP_BUNDLE_ID' }, }; } diff --git a/src/mcp/tools/simulator/install_app_sim.ts b/src/mcp/tools/simulator/install_app_sim.ts index 9faa8ba6a..029696d21 100644 --- a/src/mcp/tools/simulator/install_app_sim.ts +++ b/src/mcp/tools/simulator/install_app_sim.ts @@ -11,6 +11,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { installAppOnSimulator } from '../../../utils/simulator-steps.ts'; import { @@ -36,12 +37,13 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), appPath: z.string(), }); type InstallAppSimParams = z.infer; +type ResolvedInstallAppSimParams = InstallAppSimParams & { simulatorId: string }; const publicSchemaObject = z.strictObject( baseSchemaObject.omit({ @@ -56,8 +58,27 @@ export async function install_app_simLogic( fileSystem?: FileSystemExecutor, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildInstallFailure( + { appPath: params.appPath }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setInstallResultStructuredOutput(ctx, result); + log('error', `Error during install app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedInstallAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeInstallAppSim = createInstallAppSimExecutor(executor, fileSystem); - const result = await executeInstallAppSim(params); + const result = await executeInstallAppSim(resolvedParams); setInstallResultStructuredOutput(ctx, result); @@ -73,7 +94,7 @@ export async function install_app_simLogic( ctx.nextStepParams = { open_sim: {}, launch_app_sim: { - simulatorId: params.simulatorId, + simulatorId: resolvedParams.simulatorId, bundleId: bundleId || 'YOUR_APP_BUNDLE_ID', }, }; @@ -103,7 +124,7 @@ async function extractBundleId( export function createInstallAppSimExecutor( executor: CommandExecutor, fileSystem?: FileSystemExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const artifacts = { simulatorId: params.simulatorId, appPath: params.appPath }; diff --git a/src/mcp/tools/simulator/launch_app_sim.ts b/src/mcp/tools/simulator/launch_app_sim.ts index 065958b4b..0299bc75b 100644 --- a/src/mcp/tools/simulator/launch_app_sim.ts +++ b/src/mcp/tools/simulator/launch_app_sim.ts @@ -14,6 +14,7 @@ import { launchSimulatorAppWithLogging, type LaunchWithLoggingResult, } from '../../../utils/simulator-steps.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { buildLaunchFailure, @@ -49,7 +50,7 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), launchArgs: z.array(z.string()).optional(), @@ -57,6 +58,7 @@ const internalSchemaObject = z.object({ }); export type LaunchAppSimParams = z.infer; +type ResolvedLaunchAppSimParams = LaunchAppSimParams & { simulatorId: string }; type LaunchAppSimResult = LaunchResultDomainResult; export type SimulatorLauncher = typeof launchSimulatorAppWithLogging; @@ -67,8 +69,27 @@ export async function launch_app_simLogic( launcher: SimulatorLauncher = launchSimulatorAppWithLogging, ): Promise { const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildLaunchFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setLaunchResultStructuredOutput(ctx, result); + log('error', `Error during launch app in simulator operation: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedLaunchAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; const executeLaunchAppSim = createLaunchAppSimExecutor(executor, launcher); - const result = await executeLaunchAppSim(params); + const result = await executeLaunchAppSim(resolvedParams); setLaunchResultStructuredOutput(ctx, result); @@ -82,12 +103,12 @@ export async function launch_app_simLogic( ctx.nextStepParams = { open_sim: {}, - stop_app_sim: { simulatorId: params.simulatorId, bundleId: params.bundleId }, + stop_app_sim: { simulatorId: resolvedParams.simulatorId, bundleId: params.bundleId }, }; } function buildSuccessArtifacts( - params: LaunchAppSimParams, + params: ResolvedLaunchAppSimParams, launchResult: LaunchWithLoggingResult, ): LaunchResultArtifacts { return { @@ -102,7 +123,7 @@ function buildSuccessArtifacts( export function createLaunchAppSimExecutor( executor: CommandExecutor, launcher: SimulatorLauncher = launchSimulatorAppWithLogging, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { log('info', `Starting xcrun simctl launch request for simulator ${params.simulatorId}`); diff --git a/src/mcp/tools/simulator/stop_app_sim.ts b/src/mcp/tools/simulator/stop_app_sim.ts index 15c57c43c..0f2f06bf7 100644 --- a/src/mcp/tools/simulator/stop_app_sim.ts +++ b/src/mcp/tools/simulator/stop_app_sim.ts @@ -10,6 +10,7 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { determineSimulatorUuid } from '../../../utils/simulator-utils.ts'; import { toErrorMessage } from '../../../utils/errors.ts'; import { stopSimulatorLaunchOsLogSessionsForApp } from '../../../utils/log-capture/index.ts'; import { @@ -35,17 +36,18 @@ const baseSchemaObject = z.object({ }); const internalSchemaObject = z.object({ - simulatorId: z.string(), + simulatorId: z.string().optional(), simulatorName: z.string().optional(), bundleId: z.string(), }); export type StopAppSimParams = z.infer; +type ResolvedStopAppSimParams = StopAppSimParams & { simulatorId: string }; type StopAppSimResult = StopResultDomainResult; export function createStopAppSimExecutor( executor: CommandExecutor, -): NonStreamingExecutor { +): NonStreamingExecutor { return async (params) => { const simulatorId = params.simulatorId; const artifacts = { simulatorId, bundleId: params.bundleId }; @@ -92,13 +94,32 @@ export async function stop_app_simLogic( params: StopAppSimParams, executor: CommandExecutor, ): Promise { - const simulatorId = params.simulatorId; + const ctx = getHandlerContext(); + const simulatorResult = await determineSimulatorUuid(params, executor); + if (simulatorResult.error || !simulatorResult.uuid) { + const result = buildStopFailure( + { bundleId: params.bundleId }, + `Failed to resolve simulator: ${simulatorResult.error ?? 'No simulator UUID returned'}`, + ); + setStopResultStructuredOutput(ctx, result); + log('error', `Error stopping app in simulator: ${result.error}`); + return; + } + + if (simulatorResult.warning) { + log('warn', simulatorResult.warning); + } + + const resolvedParams: ResolvedStopAppSimParams = { + ...params, + simulatorId: simulatorResult.uuid, + }; + const simulatorId = resolvedParams.simulatorId; log('info', `Stopping app ${params.bundleId} in simulator ${simulatorId}`); - const ctx = getHandlerContext(); const executeStopAppSim = createStopAppSimExecutor(executor); - const result = await executeStopAppSim(params); + const result = await executeStopAppSim(resolvedParams); setStopResultStructuredOutput(ctx, result); if (result.didError) { diff --git a/src/mcp/tools/ui-automation/__tests__/batch.test.ts b/src/mcp/tools/ui-automation/__tests__/batch.test.ts new file mode 100644 index 000000000..4f1850a92 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/batch.test.ts @@ -0,0 +1,211 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest'; +import * as z from 'zod'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; +import { DebuggerManager } from '../../../../utils/debugger/debugger-manager.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { batchLogic, createBatchExecutor, handler, schema } from '../batch.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runBatch( + params: Parameters[0], + executor = createTrackingExecutor().executor, + axeHelpers = createMockAxeHelpers(), +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => batchLogic(params, executor, axeHelpers)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} + +describe('Batch UI Automation Tool', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes batch steps and AXe batch options', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('steps'); + expect(schema).toHaveProperty('axCache'); + expect(schema).toHaveProperty('tapStyle'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ steps: ['tap --id login'] }).success).toBe(true); + expect( + schemaObject.safeParse({ + steps: ['tap --id login', 'type user@example.com'], + axCache: 'perBatch', + typeSubmission: 'chunked', + typeChunkSize: 8, + tapStyle: 'automatic', + continueOnError: true, + waitTimeout: 2, + pollInterval: 0.25, + }).success, + ).toBe(true); + expect(schemaObject.safeParse({ steps: [] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: [''] }).success).toBe(false); + expect(schemaObject.safeParse({ steps: ['tap --id login'], pollInterval: 0 }).success).toBe( + false, + ); + }); + }); + + describe('Command Generation', () => { + it('builds repeated AXe --step arguments', async () => { + const { calls, executor } = createTrackingExecutor(); + + const result = await runBatch( + { + simulatorId, + steps: ['tap --id username-field', 'type user@example.com'], + }, + executor, + ); + + expect(result).toMatchObject({ + didError: false, + action: { type: 'batch', stepCount: 2 }, + }); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'batch', + '--step', + 'tap --id username-field', + '--step', + 'type user@example.com', + '--udid', + simulatorId, + ], + ]); + }); + + it('passes AXe batch options through unchanged', async () => { + const { calls, executor } = createTrackingExecutor(); + + await runBatch( + { + simulatorId, + steps: ['tap --id login'], + axCache: 'perStep', + typeSubmission: 'composite', + typeChunkSize: 4, + tapStyle: 'physical', + continueOnError: true, + waitTimeout: 3, + pollInterval: 0.5, + }, + executor, + ); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'batch', + '--step', + 'tap --id login', + '--ax-cache', + 'perStep', + '--type-submission', + 'composite', + '--type-chunk-size', + '4', + '--tap-style', + 'physical', + '--continue-on-error', + '--wait-timeout', + '3', + '--poll-interval', + '0.5', + '--udid', + simulatorId, + ]); + }); + }); + + describe('Runtime snapshot invalidation', () => { + it('clears the cached runtime snapshot after a successful batch', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch({ simulatorId, steps: ['tap --id login'] }); + + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('clears the cached runtime snapshot when AXe runs and reports batch failure', async () => { + recordSnapshot([createNode()]); + + const result = await runBatch( + { simulatorId, steps: ['type Secret123'] }, + createFailingExecutor('step failed: type Secret123'), + ); + + expect(result.didError).toBe(true); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('preserves the cached runtime snapshot when AXe is unavailable before execution', async () => { + recordSnapshot([createNode()]); + const { executor } = createTrackingExecutor(); + + const result = await runBatch( + { simulatorId, steps: ['tap --id login'] }, + executor, + createMockAxeHelpers({ getAxePathReturn: null }), + ); + + expect(result.didError).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + + it('preserves the cached runtime snapshot when the debugger guard blocks before AXe runs', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); + const debuggerManager = new DebuggerManager(); + vi.spyOn(debuggerManager, 'findSessionForSimulator').mockReturnValue({ + id: 'debug-session-1', + backend: 'dap', + simulatorId, + pid: 1234, + createdAt: 0, + lastUsedAt: 0, + }); + vi.spyOn(debuggerManager, 'getExecutionState').mockResolvedValue({ + status: 'stopped', + reason: 'breakpoint', + }); + const executeBatch = createBatchExecutor(executor, createMockAxeHelpers(), debuggerManager); + + const result = await executeBatch({ simulatorId, steps: ['tap --id login'] }); + + expect(result.didError).toBe(true); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + }); + }); + + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await handler({ steps: ['tap --id login'] }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/button.test.ts b/src/mcp/tools/ui-automation/__tests__/button.test.ts index 8fb31aed6..2d01bd873 100644 --- a/src/mcp/tools/ui-automation/__tests__/button.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/button.test.ts @@ -1,11 +1,11 @@ -import { describe, it, expect } from 'vitest'; +import { describe, it, expect, vi } from 'vitest'; import * as z from 'zod'; import { createMockExecutor, createNoopExecutor, createMockCommandResponse, } from '../../../../test-utils/mock-executors.ts'; -import { schema, handler, buttonLogic } from '../button.ts'; +import { schema, handler, buttonLogic, createButtonExecutor } from '../button.ts'; import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; @@ -23,6 +23,8 @@ describe('Button Plugin', () => { expect(schemaObj.safeParse({ buttonType: 'home', duration: 2.5 }).success).toBe(true); expect(schemaObj.safeParse({ buttonType: 'invalid-button' }).success).toBe(false); expect(schemaObj.safeParse({ buttonType: 'home', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ buttonType: 'home', duration: 10.1 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -60,6 +62,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -97,6 +101,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -135,6 +141,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -171,6 +179,8 @@ describe('Button Plugin', () => { }, trackingExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -184,6 +194,46 @@ describe('Button Plugin', () => { }); }); + describe('Executor Behavior', () => { + it('waits briefly after successful button presses so system UI transitions can settle', async () => { + vi.useFakeTimers(); + try { + const mockExecutor = createMockExecutor({ + success: true, + output: 'button press completed', + error: undefined, + process: { pid: 12345 }, + }); + + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + const executeButton = createButtonExecutor(mockExecutor, mockAxeHelpers, undefined, 500); + let settled = false; + const resultPromise = executeButton({ + simulatorId: '12345678-1234-4234-8234-123456789012', + buttonType: 'home', + }).then((result) => { + settled = true; + return result; + }); + + await vi.advanceTimersByTimeAsync(499); + expect(settled).toBe(false); + + await vi.advanceTimersByTimeAsync(1); + const result = await resultPromise; + + expect(settled).toBe(true); + expect(result.didError).toBe(false); + } finally { + vi.useRealTimers(); + } + }); + }); + describe('Handler Behavior (Complete Literal Returns)', () => { it('should surface session default requirement when simulatorId is missing', async () => { const result = await handler({ buttonType: 'home' }); @@ -235,7 +285,7 @@ describe('Button Plugin', () => { expect(result.isError).toBe(true); expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('Duration must be non-negative'); + expect(allText(result)).toContain('Duration must be greater than 0 seconds'); }); it('should return success for valid button press', async () => { @@ -259,6 +309,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -288,6 +340,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -309,6 +363,8 @@ describe('Button Plugin', () => { }, createNoopExecutor(), mockAxeHelpers, + undefined, + 0, ), ); @@ -337,6 +393,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -364,6 +422,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -391,6 +451,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); @@ -418,6 +480,8 @@ describe('Button Plugin', () => { }, mockExecutor, mockAxeHelpers, + undefined, + 0, ), ); diff --git a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts index 48b5f5e59..23ba3c255 100644 --- a/src/mcp/tools/ui-automation/__tests__/gesture.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/gesture.test.ts @@ -37,7 +37,11 @@ describe('Gesture Plugin', () => { ).toBe(true); expect(schemaObj.safeParse({ preset: 'invalid-preset' }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenWidth: 2001 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', screenHeight: 3001 }).success).toBe(false); expect(schemaObj.safeParse({ preset: 'scroll-up', duration: -1 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ preset: 'scroll-up', delta: 201 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts index 96ee71648..825b95567 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_press.test.ts @@ -36,6 +36,8 @@ describe('Key Press Tool', () => { expect(schemaObj.safeParse({ keyCode: 'invalid' }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: -1 }).success).toBe(false); expect(schemaObj.safeParse({ keyCode: 256 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 0 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCode: 40, duration: 10.1 }).success).toBe(false); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts index badf86b65..e11593eb4 100644 --- a/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/key_sequence.test.ts @@ -29,6 +29,10 @@ describe('Key Sequence Tool', () => { expect(schemaObj.safeParse({ keyCodes: [-1] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [256] }).success).toBe(false); expect(schemaObj.safeParse({ keyCodes: [40], delay: -0.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: [40], delay: 5.1 }).success).toBe(false); + expect(schemaObj.safeParse({ keyCodes: Array.from({ length: 101 }, () => 40) }).success).toBe( + false, + ); const withSimId = schemaObj.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', diff --git a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts index 6afa10ce7..3e689d9a0 100644 --- a/src/mcp/tools/ui-automation/__tests__/long_press.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/long_press.test.ts @@ -1,457 +1,191 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, long_pressLogic } from '../long_press.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runLongPress( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => long_pressLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Long Press Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); - describe('Export Field Validation (Literal)', () => { - it('should have handler function', () => { + describe('Schema Validation', () => { + it('exposes elementRef and duration without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('duration'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 1500, - }).success, - ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 100.5, - y: 200, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200.5, - duration: 1500, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: 0, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - duration: -100, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); - }); - }); - - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await handler({ x: 100, y: 200, duration: 1500 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); - - it('should surface validation errors once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await handler({ x: 100, y: 200, duration: 0 }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('duration: Duration of the long press in milliseconds'); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 1500 }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 0 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', duration: 10_001 }).success).toBe(false); + expect(schemaObject.safeParse({ duration: 1500 }).success).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('long presses the referenced element center and converts milliseconds to AXe seconds', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(result).toMatchObject({ + didError: false, + action: { type: 'long-press', elementRef: 'e1', durationMs: 1500 }, + }); + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for long press with different coordinates', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - duration: 2000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '50', - '-y', - '75', - '--down', - '--up', - '--delay', - '2', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), ]); - }); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for short duration long press', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; + await runLongPress({ simulatorId, elementRef: 'e1', duration: 1000 }, executor); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - duration: 500, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '300', + '307', '-y', - '400', + '903', '--down', '--up', '--delay', - '0.5', + '1', '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'long press completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - duration: 3000, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'touch', - '-x', - '150', - '-y', - '250', - '--down', - '--up', - '--delay', - '3', - '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return success for valid long press execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'long press completed', - error: '', - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Long press at (100, 200) for 1500ms simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: '', - error: undefined, - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => null, // Mock axe not found - getBundledAxeEnvironment: () => ({}), - }; + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - process: mockProcess, - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e404', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate long press at (100, 200).'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle SystemError from command execution', async () => { - const mockExecutor = () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1000 }, + executor, ); - expect(result.isError).toBe(true); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await handler({ elementRef: 'e1', duration: 1500 }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - const mockExecutor = () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); - const result = await runLogic(() => - long_pressLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - duration: 1500, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runLongPress( + { simulatorId, elementRef: 'e1', duration: 1500 }, + createFailingExecutor('long press failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts index 43244a351..914a4f0df 100644 --- a/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/non_streaming_progress.test.ts @@ -1,9 +1,10 @@ import { describe, expect, it } from 'vitest'; import { + createCommandMatchingMockExecutor, createMockExecutor, createMockFileSystemExecutor, } from '../../../../test-utils/mock-executors.ts'; -import { runToolLogic } from '../../../../test-utils/test-helpers.ts'; +import { createMockToolHandlerContext, runToolLogic } from '../../../../test-utils/test-helpers.ts'; import { buttonLogic } from '../button.ts'; import { gestureLogic } from '../gesture.ts'; import { key_pressLogic } from '../key_press.ts'; @@ -15,6 +16,8 @@ import { swipeLogic } from '../swipe.ts'; import { tapLogic } from '../tap.ts'; import { touchLogic } from '../touch.ts'; import { type_textLogic } from '../type_text.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; +import { createNode, recordSnapshot } from './ui-action-test-helpers.ts'; const simulatorId = '12345678-1234-4234-8234-123456789012'; @@ -71,60 +74,76 @@ describe('ui automation non-streaming tools', () => { }, { name: 'long_press', - run: () => - long_pressLogic( - { simulatorId, x: 100, y: 200, duration: 1500 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return long_pressLogic( + { simulatorId, elementRef: 'e1', duration: 1500 }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Long press at (100, 200) for 1500ms simulated successfully.', + ); + }, }, { name: 'swipe', - run: () => - swipeLogic( - { simulatorId, x1: 10, y1: 20, x2: 30, y2: 40 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + return swipeLogic( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Swipe from (10, 20) to (30, 40) simulated successfully.', + ); + }, }, { name: 'tap', - run: () => - tapLogic( - { simulatorId, x: 100, y: 200 }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return tapLogic( + { simulatorId, elementRef: 'e1' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Tap at (100, 200) simulated successfully.', + ); + }, }, { name: 'touch', - run: () => - touchLogic( - { simulatorId, x: 100, y: 200, down: true }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode()]); + return touchLogic( + { simulatorId, elementRef: 'e1', down: true }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Touch event (touch down) at (100, 200) executed successfully.', + ); + }, }, { name: 'type_text', - run: () => - type_textLogic( - { simulatorId, text: 'Hello' }, + run: () => { + __resetRuntimeSnapshotStoreForTests(); + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + return type_textLogic( + { simulatorId, elementRef: 'e1', text: 'Hello' }, createMockExecutor({ success: true }), axeHelpers, - ), - expectedText: 'Text typing simulated successfully.', + ); + }, + expectedText: 'Text typed into elementRef e1 (5 characters) successfully.', }, ]; for (const testCase of cases) { const { result } = await runToolLogic(testCase.run); expect(result.events, `${testCase.name} should not emit progress events`).toEqual([]); - expect(result.text()).toContain(testCase.expectedText); + expect(result.isError()).toBe(false); + if (testCase.expectedText) { + expect(result.text()).toContain(testCase.expectedText); + } else { + expect(result.text().trim().length).toBeGreaterThan(0); + } } }); @@ -132,7 +151,19 @@ describe('ui automation non-streaming tools', () => { const { result } = await runToolLogic(() => screenshotLogic( { simulatorId, returnFormat: 'path' }, - createMockExecutor({ success: true, output: 'Screenshot saved' }), + createCommandMatchingMockExecutor({ + 'xcrun simctl list devices -j': { + output: JSON.stringify({ + devices: { + 'iOS 26.0': [{ udid: simulatorId, name: 'iPhone 17', state: 'Booted' }], + }, + }), + }, + 'xcrun simctl io': { output: 'Screenshot saved' }, + 'swift -e': { output: '368,800' }, + 'sips -Z': { output: 'optimized' }, + 'sips -g pixelWidth': { output: 'pixelWidth: 368\npixelHeight: 800' }, + }), createMockFileSystemExecutor(), { tmpdir: () => '/tmp', join: (...paths) => paths.join('/') }, { v4: () => 'test-uuid' }, @@ -143,8 +174,9 @@ describe('ui automation non-streaming tools', () => { expect(result.text()).toContain('Screenshot captured'); }); - it('returns snapshot_ui text from structured output without progress events', async () => { - const { result } = await runToolLogic(() => + it('returns snapshot_ui structured output without emitting progress events', async () => { + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId, @@ -159,8 +191,17 @@ describe('ui automation non-streaming tools', () => { ); expect(result.events).toEqual([]); - expect(result.text()).toContain('Accessibility hierarchy retrieved successfully.'); - expect(result.text()).toContain('Accessibility Hierarchy'); - expect(result.text()).toContain('"type" : "Button"'); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts new file mode 100644 index 000000000..e8bb69054 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/runtime-snapshot.test.ts @@ -0,0 +1,562 @@ +import { describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { + createRuntimeSnapshotRecord, + extractAccessibilityHierarchy, + getPrimaryRuntimeElement, + getRuntimeElementActivationPoint, + getRuntimeElementSwipePoints, + RuntimeSnapshotParseError, +} from '../shared/runtime-snapshot.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + ...overrides, + }; +} + +describe('runtime snapshot normalization', () => { + it('flattens AX hierarchy into RuntimeSnapshotV1 public elements', () => { + const child = createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXValue: 'cam@example.com', + AXUniqueId: 'email-field', + AXSelected: true, + frame: { x: 20, y: 80, width: 220, height: 44 }, + }); + const root = createNode({ + type: 'Window', + role: 'AXWindow', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [child], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + }), + ); + expect(snapshot.payload.elements.map((element) => element.ref)).toEqual(['e1', 'e2']); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + ref: 'e2', + role: 'text-field', + label: 'Email', + value: 'cam@example.com', + identifier: 'email-field', + frame: { x: 20, y: 80, width: 220, height: 44 }, + state: { enabled: true, selected: true, visible: true }, + actions: expect.arrayContaining(['tap', 'typeText', 'longPress', 'touch']), + }), + ); + expect(snapshot.payload.screenHash).toMatch(/^[a-z0-9]+$/); + expect(snapshot.payload.seq).toBe(0); + expect(snapshot.payload.actions).toContainEqual({ + action: 'typeText', + elementRef: 'e2', + label: 'Email', + }); + expect(snapshot.elements[1]?.rawNode).toBe(child); + expect('rawNode' in snapshot.payload.elements[1]!).toBe(false); + expect(snapshot.elementsByRef.get('e2')?.rawNode).toBe(child); + }); + + it('derives deterministic screen hashes from normalized UI content', () => { + const uiHierarchy = [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Cancel' })]; + + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy, nowMs: 2_000 }); + const changed = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' }), createNode({ AXLabel: 'Done' })], + nowMs: 1_000, + }); + + expect(first.payload.screenHash).toBe(second.payload.screenHash); + expect(first.payload.screenHash).not.toBe(changed.payload.screenHash); + }); + + it('parses AXe describe-ui response envelopes', () => { + const responseText = JSON.stringify({ + elements: [createNode({ AXLabel: 'Continue' })], + }); + + const hierarchy = extractAccessibilityHierarchy(responseText); + + expect(hierarchy).toHaveLength(1); + expect(hierarchy[0]?.AXLabel).toBe('Continue'); + }); + + it('throws typed parse errors for invalid describe-ui responses', () => { + expect(() => extractAccessibilityHierarchy('not json')).toThrow(RuntimeSnapshotParseError); + expect(() => extractAccessibilityHierarchy(JSON.stringify({ value: [] }))).toThrow( + RuntimeSnapshotParseError, + ); + }); + + it('selects the primary element for semantic next steps', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [createNode({ AXLabel: 'Continue' })], + nowMs: 1_000, + }); + + expect(getPrimaryRuntimeElement(snapshot.payload, 'tap')?.label).toBe('Continue'); + expect(getPrimaryRuntimeElement(snapshot.payload, 'typeText')).toBe( + snapshot.payload.elements[0], + ); + }); + + it('does not infer swipeWithin on top-level applications with overflowing descendants', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Updated just now', + frame: { x: 140, y: 1200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Weather', + actions: [], + }), + ); + expect(snapshot.payload.actions).not.toContainEqual({ + action: 'swipeWithin', + elementRef: 'e1', + label: 'Weather', + }); + }); + + it('does not infer swipeWithin on top-level windows with overflowing descendants', () => { + const root = createNode({ + type: 'Window', + role: 'AXWindow', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Updated just now', + frame: { x: 140, y: 1200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'window', + label: 'Weather', + actions: [], + }), + ); + }); + + it('does not infer swipeWithin when descendants fit inside the container', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Visible label', + frame: { x: 20, y: 200, width: 120, height: 20 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]?.actions).toEqual([]); + }); + + it('keeps sheet hosts swipeable when the current visible sheet content fits', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + AXLabel: 'Weather', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + AXValue: 'Expanded', + frame: { x: 163, y: 57, width: 76, height: 25 }, + }), + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 36, y: 603, width: 330, height: 28 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + ref: 'e1', + role: 'application', + label: 'Weather', + actions: ['swipeWithin'], + }), + ); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ + ok: true, + from: { x: 201, y: 372 }, + to: { x: 201, y: 677 }, + }); + }); + + it('removes actions from elements outside the viewport', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 390, height: 844 }, + children: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + AXLabel: 'Reduce transparency', + AXValue: '0', + frame: { x: 40, y: 890, width: 300, height: 30 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'switch', + label: 'Reduce transparency', + value: '0', + state: expect.objectContaining({ visible: false }), + actions: [], + }), + ); + }); + + it('removes point-based actions from clipped elements with offscreen activation points', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Lisbon', + frame: { x: 20, y: 839.33, width: 362, height: 89 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'button', + label: 'Lisbon', + state: expect.objectContaining({ visible: true }), + actions: [], + }), + ); + }); + + it('uses an upper activation point for bottom-clipped visible targets', () => { + const root = createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + frame: { x: 324.87, y: 786.62, width: 49.93, height: 85.46 }, + }), + ], + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [root], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[1]?.actions).toContain('tap'); + expect(getRuntimeElementActivationPoint(snapshot.elements[1]!)).toEqual({ x: 350, y: 795 }); + }); + + it('does not mark unlabeled custom-action internals as tap targets', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: undefined, + AXValue: undefined, + AXUniqueId: undefined, + identifier: undefined, + frame: { x: 30, y: 450, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'label-view', + frame: { x: 30, y: 500, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + createNode({ + type: 'Other', + role: 'AXGroup', + AXUniqueId: 'named-custom-target', + frame: { x: 30, y: 550, width: 80, height: 32 }, + custom_actions: ['Press'], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[1]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'label-view', + actions: expect.not.arrayContaining(['tap']), + }), + ); + expect(snapshot.payload.elements[2]).toEqual( + expect.objectContaining({ + role: 'other', + identifier: 'named-custom-target', + actions: expect.arrayContaining(['tap']), + }), + ); + }); + + it('does not mark standalone other elements as swipeable', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Suggested', + frame: { x: 30, y: 450, width: 80, height: 32 }, + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Suggested', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('does not infer swipeWithin on small other wrappers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + frame: { x: 0, y: 0, width: 80, height: 80 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 100, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + actions: expect.not.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('infers swipeWithin on other containers with overflowing descendants', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Other', + role: 'AXGroup', + AXLabel: 'Scrollable panel', + frame: { x: 0, y: 0, width: 200, height: 200 }, + children: [ + createNode({ + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Overflow', + frame: { x: 10, y: 260, width: 100, height: 20 }, + }), + ], + }), + ], + nowMs: 1_000, + }); + + expect(snapshot.payload.elements[0]).toEqual( + expect.objectContaining({ + role: 'other', + label: 'Scrollable panel', + actions: expect.arrayContaining(['swipeWithin']), + }), + ); + }); + + it('derives trailing activation points for wide switch rows', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementActivationPoint(snapshot.elements[0]!)).toEqual({ x: 307, y: 903 }); + }); + + it('keeps full-screen swipe points away from unsafe viewport edges', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'Application', + role: 'AXApplication', + frame: { x: 0, y: 0, width: 402, height: 874 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'down')).toEqual({ + ok: true, + from: { x: 201, y: 131 }, + to: { x: 201, y: 743 }, + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'left')).toEqual({ + ok: true, + from: { x: 342, y: 524 }, + to: { x: 60, y: 524 }, + }); + }); + + it('rejects unsafe swipe point derivation', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ], + nowMs: 1_000, + }); + + expect(getRuntimeElementSwipePoints(snapshot.elements[0]!, 'up')).toMatchObject({ + ok: false, + message: expect.stringContaining('too small'), + }); + expect(getRuntimeElementSwipePoints(snapshot.elements[1]!, 'right')).toMatchObject({ + ok: false, + message: expect.stringContaining('non-degenerate'), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts index 2184eff4c..57fd51f10 100644 --- a/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/screenshot.test.ts @@ -1,7 +1,6 @@ import { describe, it, expect, beforeEach } from 'vitest'; import * as z from 'zod'; import { - createMockExecutor, createMockFileSystemExecutor, mockProcess, } from '../../../../test-utils/mock-executors.ts'; @@ -16,6 +15,43 @@ import { } from '../screenshot.ts'; import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +function isDeviceListCommand(command: string[]): boolean { + return command.join(' ') === 'xcrun simctl list devices -j'; +} + +function bootedDeviceListJson(simulatorId: string): string { + return JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: simulatorId, + name: 'iPhone 15 Pro', + state: 'Booted', + }, + ], + }, + }); +} + +function createBootedScreenshotMockExecutor(simulatorId: string) { + return async (command: string[]) => { + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson(simulatorId), + error: undefined, + process: mockProcess, + }; + } + return { + success: true, + output: 'Screenshot saved', + error: undefined, + process: mockProcess, + }; + }; +} + describe('Screenshot Plugin', () => { beforeEach(() => { sessionStore.clear(); @@ -68,6 +104,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -93,8 +137,7 @@ describe('Screenshot Plugin', () => { ), ); - // Should capture the screenshot command first - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -108,6 +151,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -133,7 +184,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -147,6 +198,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('98765432-1098-7654-3210-987654321098'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -175,7 +234,7 @@ describe('Screenshot Plugin', () => { ), ); - expect(capturedCommands[0]).toEqual([ + expect(capturedCommands[1]).toEqual([ 'xcrun', 'simctl', 'io', @@ -189,6 +248,14 @@ describe('Screenshot Plugin', () => { const capturedCommands: string[][] = []; const trackingExecutor = async (command: string[]) => { capturedCommands.push(command); + if (isDeviceListCommand(command)) { + return { + success: true, + output: bootedDeviceListJson('12345678-1234-4234-8234-123456789012'), + error: undefined, + process: mockProcess, + }; + } return { success: true, output: 'Screenshot saved', @@ -215,24 +282,22 @@ describe('Screenshot Plugin', () => { ); // Verify the command structure but not the exact UUID since it's generated - expect(capturedCommands[0].slice(0, 5)).toEqual([ + expect(capturedCommands[1].slice(0, 5)).toEqual([ 'xcrun', 'simctl', 'io', '12345678-1234-4234-8234-123456789012', 'screenshot', ]); - expect(capturedCommands[0][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); + expect(capturedCommands[1][5]).toMatch(/^\/tmp\/screenshot_[a-f0-9-]+\.png$/); }); }); describe('Handler Behavior (Complete Literal Returns)', () => { it('should handle file reading errors', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => { @@ -260,11 +325,9 @@ describe('Screenshot Plugin', () => { it('should handle file cleanup errors gracefully', async () => { const mockImageBuffer = Buffer.from('fake-image-data', 'utf8'); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Screenshot saved', - error: undefined, - }); + const mockExecutor = createBootedScreenshotMockExecutor( + '12345678-1234-4234-8234-123456789012', + ); const mockFileSystemExecutor = createMockFileSystemExecutor({ readFile: async () => mockImageBuffer.toString('utf8'), @@ -366,6 +429,45 @@ describe('Screenshot Plugin', () => { ).toBe(true); }); + it('fails before screenshot capture when the simulator is shutdown', async () => { + const capturedCommands: string[][] = []; + const mockExecutor = async (command: string[]) => { + capturedCommands.push(command); + return { + success: true, + output: JSON.stringify({ + devices: { + 'com.apple.CoreSimulator.SimRuntime.iOS-17-2': [ + { + udid: '12345678-1234-4234-8234-123456789012', + name: 'iPhone 15 Pro', + state: 'Shutdown', + }, + ], + }, + }), + error: undefined, + process: mockProcess, + }; + }; + + const result = await runLogic(() => + screenshotLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + }, + mockExecutor, + createMockFileSystemExecutor(), + ), + ); + + expect(result.isError).toBe(true); + const text = allText(result); + expect(text).toContain('Failed to capture screenshot.'); + expect(text).toContain('is Shutdown'); + expect(capturedCommands).toEqual([['xcrun', 'simctl', 'list', 'devices', '-j']]); + }); + it('should handle SystemError from command execution', async () => { const mockExecutor = async () => { throw new SystemError('System error occurred'); @@ -614,20 +716,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -689,20 +791,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -756,20 +858,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; @@ -819,20 +921,20 @@ describe('Screenshot Plugin', () => { capturedCommands.push(command); const idx = commandIndex++; - // First call: screenshot command + // First call: simulator boot preflight if (idx === 0) { return { success: true, - output: 'Screenshot saved', + output: mockDeviceListJson, error: undefined, process: mockProcess, }; } - // Second call: list devices to get device name + // Second call: screenshot command if (idx === 1) { return { success: true, - output: mockDeviceListJson, + output: 'Screenshot saved', error: undefined, process: mockProcess, }; diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts new file mode 100644 index 000000000..c03ed6219 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/snapshot-ui-state.test.ts @@ -0,0 +1,142 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + clearRuntimeSnapshot, + getRuntimeSnapshot, + getRuntimeSnapshotLookup, + getSnapshotUiWarning, + recordRuntimeSnapshot, + resolveElementRef, +} from '../shared/snapshot-ui-state.ts'; + +const simulatorId = '12345678-1234-4234-8234-123456789012'; + +const node: AccessibilityNode = { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', +}; + +describe('runtime snapshot store', () => { + beforeEach(() => { + __resetRuntimeSnapshotStoreForTests(); + }); + + it('stores runtime snapshots by simulator id', () => { + const nowMs = Date.now(); + const snapshot = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs }); + + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshot(simulatorId, nowMs + 1_000)).toBe(snapshot); + expect(getRuntimeSnapshotLookup(simulatorId, nowMs + 1_000)).toEqual({ + status: 'available', + snapshot, + snapshotAgeMs: 1_000, + }); + expect(getSnapshotUiWarning(simulatorId)).toBeNull(); + }); + + it('assigns monotonic snapshot sequences when recording snapshots', () => { + const first = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 1_000 }); + const second = createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: [node], nowMs: 2_000 }); + + recordRuntimeSnapshot(first); + clearRuntimeSnapshot(simulatorId); + recordRuntimeSnapshot(second); + + expect(first.seq).toBe(1); + expect(first.payload.seq).toBe(1); + expect(second.seq).toBe(2); + expect(second.payload.seq).toBe(2); + expect(getRuntimeSnapshot(simulatorId, 2_000)).toBe(second); + }); + + it('expires stale snapshots and clears them from the store', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(getRuntimeSnapshotLookup(simulatorId, 62_000)).toEqual({ + status: 'expired', + snapshot: null, + snapshotAgeMs: 61_000, + }); + expect(getRuntimeSnapshot(simulatorId, 62_000)).toBeNull(); + }); + + it('clears snapshots explicitly', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + clearRuntimeSnapshot(simulatorId); + + expect(getRuntimeSnapshotLookup(simulatorId)).toEqual({ status: 'missing', snapshot: null }); + }); + + it('resolves actionable element refs', () => { + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + + expect(resolveElementRef(simulatorId, 'e1', 'tap', 2_000)).toEqual({ + ok: true, + snapshot, + element: snapshot.elements[0], + snapshotAgeMs: 1_000, + }); + }); + + it('returns typed recoverable errors for missing, expired, not-found, and not-actionable refs', () => { + expect(resolveElementRef(simulatorId, 'e1', 'tap', 1_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_MISSING' }), + }); + + const snapshot = createRuntimeSnapshotRecord({ + simulatorId, + uiHierarchy: [node], + nowMs: 1_000, + }); + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e1', 'tap', 62_000)).toEqual({ + ok: false, + error: expect.objectContaining({ code: 'SNAPSHOT_EXPIRED', snapshotAgeMs: 61_000 }), + }); + + recordRuntimeSnapshot(snapshot); + expect(resolveElementRef(simulatorId, 'e404', 'tap', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'ELEMENT_REF_NOT_FOUND', + elementRef: 'e404', + snapshotAgeMs: 1_000, + }), + }); + + expect(resolveElementRef(simulatorId, 'e1', 'typeText', 2_000)).toEqual({ + ok: false, + error: expect.objectContaining({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + snapshotAgeMs: 1_000, + }), + }); + }); +}); diff --git a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts index 682d96c7b..5628417f7 100644 --- a/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/snapshot_ui.test.ts @@ -4,7 +4,15 @@ import { createMockExecutor, createNoopExecutor } from '../../../../test-utils/m import type { CommandExecutor } from '../../../../utils/execution/index.ts'; import { schema, handler, snapshot_uiLogic } from '../snapshot_ui.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +import { + allText, + createMockToolHandlerContext, + runLogic, +} from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; describe('Snapshot UI Plugin', () => { describe('Export Field Validation (Literal)', () => { @@ -16,6 +24,7 @@ describe('Snapshot UI Plugin', () => { const schemaObject = z.object(schema); expect(schemaObject.safeParse({}).success).toBe(true); + expect(schemaObject.safeParse({ sinceScreenHash: 'screen-hash' }).success).toBe(true); const withSimId = schemaObject.safeParse({ simulatorId: '12345678-1234-4234-8234-123456789012', @@ -69,7 +78,9 @@ describe('Snapshot UI Plugin', () => { return mockExecutor(...args); }; - const result = await runLogic(() => + __resetRuntimeSnapshotStoreForTests(); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic( { simulatorId: '12345678-1234-4234-8234-123456789012', @@ -86,18 +97,613 @@ describe('Snapshot UI Plugin', () => { { env: {} }, ]); - expect(result.isError).toBeFalsy(); - const text = allText(result); - expect(text).toContain('Accessibility hierarchy retrieved successfully.'); - expect(text).toContain('Accessibility Hierarchy'); - expect(text).toContain('"type" : "Button"'); - expect(text).toContain('"width" : 50'); - expect(text).toContain('Use frame coordinates for tap/swipe'); - expect(result.nextStepParams).toEqual({ - snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, - tap: { simulatorId: '12345678-1234-4234-8234-123456789012', x: 0, y: 0 }, - screenshot: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + expect(result.isError()).toBe(false); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect(ctx.structuredOutput?.result.kind).toBe('capture-result'); + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [ + expect.objectContaining({ + ref: 'e1', + role: 'button', + frame: { x: 100, y: 200, width: 50, height: 30 }, + state: { enabled: true, visible: true }, + actions: expect.arrayContaining(['tap']), + }), + ], + }), + ); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' ? capture.actions : [], + ).toContainEqual({ action: 'tap', elementRef: 'e1' }); + expect( + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? 'rawNode' in capture.elements[0]! + : true, + ).toBe(false); + const storedSnapshot = getRuntimeSnapshot('12345678-1234-4234-8234-123456789012'); + expect(storedSnapshot?.payload).toBe(capture); + const elementRef = + capture && 'type' in capture && capture.type === 'runtime-snapshot' + ? capture.elements[0]?.ref + : undefined; + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Tap an elementRef', + tool: 'tap', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef, + }, + }, + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + ]); + }); + + it('should return unchanged capture when sinceScreenHash matches the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const first = createMockToolHandlerContext(); + await first.run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + const firstCapture = + first.ctx.structuredOutput?.result.kind === 'capture-result' + ? first.ctx.structuredOutput.result.capture + : undefined; + const screenHash = + firstCapture && 'screenHash' in firstCapture ? firstCapture.screenHash : undefined; + expect(screenHash).toEqual(expect.any(String)); + + const second = createMockToolHandlerContext(); + await second.run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: screenHash, + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + second.ctx.structuredOutput?.result.kind === 'capture-result' + ? second.ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual({ + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash, + seq: 2, }); + expect(getRuntimeSnapshot('12345678-1234-4234-8234-123456789012')?.seq).toBe(2); + expect(second.ctx.nextSteps?.find((step) => step.tool === 'tap')).toBeUndefined(); + }); + + it('should return full runtime snapshot when sinceScreenHash differs from the current screen hash', async () => { + const uiHierarchy = + '{"elements": [{"type": "Button", "frame": {"x": 100, "y": 200, "width": 50, "height": 30}}]}'; + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { + simulatorId: '12345678-1234-4234-8234-123456789012', + sinceScreenHash: 'different-screen-hash', + }, + mockExecutor, + mockAxeHelpers, + ), + ); + + const capture = + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.capture + : undefined; + expect(capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: '12345678-1234-4234-8234-123456789012', + screenHash: expect.any(String), + seq: 1, + elements: [expect.objectContaining({ ref: 'e1' })], + }), + ); + }); + + it('should omit tap next-step guidance when no tap targets exist', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'StaticText', + role: 'AXStaticText', + AXLabel: 'Loading weather...', + frame: { x: 20, y: 100, width: 200, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps).toEqual([ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { + simulatorId: '12345678-1234-4234-8234-123456789012', + predicate: 'settled', + }, + }, + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: '12345678-1234-4234-8234-123456789012' }, + }, + ]); + }); + + it('should prefer a non-text-field tap target in next steps', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + frame: { x: 20, y: 40, width: 200, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Submit', + frame: { x: 20, y: 100, width: 100, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer a useful digit over calculator utility controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'C', + frame: { x: 20, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '±', + frame: { x: 100, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '%', + frame: { x: 180, y: 40, width: 70, height: 70 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '7', + frame: { x: 20, y: 120, width: 70, height: 70 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should prefer an unselected segmented choice over an already-selected choice for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: '°F', + AXValue: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: '°C', + AXValue: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should skip low-value controls for tap next-step guidance when another tap target exists', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Clear search', + frame: { x: 30, y: 90, width: 120, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Berlin, Germany', + frame: { x: 20, y: 150, width: 320, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e4', + }); + }); + + it('should not prefer destructive controls for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Remove', + AXIdentifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland, 1:24 PM · Light Rain', + frame: { x: 20, y: 140, width: 300, height: 80 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should not suggest the sheet grabber as a tap next step', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Sheet Grabber', + frame: { x: 150, y: 10, width: 80, height: 20 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Close', + frame: { x: 300, y: 40, width: 60, height: 44 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e2', + }); + }); + + it('should prefer content-rich targets for tap next-step guidance', async () => { + const uiHierarchy = JSON.stringify({ + elements: [ + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Portland', + AXIdentifier: 'weather.locationButton', + frame: { x: 20, y: 40, width: 160, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'Settings', + AXIdentifier: 'weather.settingsButton', + frame: { x: 320, y: 40, width: 44, height: 44 }, + }, + { + type: 'Button', + role: 'AXButton', + AXLabel: 'PRECIP., 78%, Next 24 hours', + AXIdentifier: 'weather.precipitationCard', + frame: { x: 20, y: 260, width: 340, height: 140 }, + }, + ], + }); + const mockExecutor = createMockExecutor({ + success: true, + output: uiHierarchy, + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + __resetRuntimeSnapshotStoreForTests(); + const { ctx, run } = createMockToolHandlerContext(); + await run(() => + snapshot_uiLogic( + { simulatorId: '12345678-1234-4234-8234-123456789012' }, + mockExecutor, + mockAxeHelpers, + ), + ); + + expect(ctx.nextSteps?.find((step) => step.tool === 'tap')?.params).toEqual({ + simulatorId: '12345678-1234-4234-8234-123456789012', + elementRef: 'e3', + }); + }); + + it('should clear runtime snapshot store when AXe output cannot be parsed', async () => { + __resetRuntimeSnapshotStoreForTests(); + const simulatorId = '12345678-1234-4234-8234-123456789012'; + const seededExecutor = createMockExecutor({ + success: true, + output: + '{"elements": [{"type": "Button", "frame": {"x": 1, "y": 2, "width": 3, "height": 4}}]}', + error: undefined, + process: { pid: 12345 }, + }); + const mockAxeHelpers = { + getAxePath: () => '/usr/local/bin/axe', + getBundledAxeEnvironment: () => ({}), + }; + + await runLogic(() => snapshot_uiLogic({ simulatorId }, seededExecutor, mockAxeHelpers)); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); + + const invalidJsonExecutor = createMockExecutor({ + success: true, + output: 'not json', + error: undefined, + process: { pid: 12345 }, + }); + const { ctx, result, run } = createMockToolHandlerContext(); + await run(() => snapshot_uiLogic({ simulatorId }, invalidJsonExecutor, mockAxeHelpers)); + + expect(result.isError()).toBe(true); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + expect( + ctx.structuredOutput?.result.kind === 'capture-result' + ? ctx.structuredOutput.result.uiError + : undefined, + ).toEqual( + expect.objectContaining({ + code: 'SNAPSHOT_PARSE_FAILED', + }), + ); }); it('should handle DependencyError when axe is not available', async () => { diff --git a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts index 76a436239..53228a3ba 100644 --- a/src/mcp/tools/ui-automation/__tests__/swipe.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/swipe.test.ts @@ -1,228 +1,156 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; -import { SystemError } from '../../../../utils/errors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, swipeLogic, type SwipeParams } from '../swipe.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; -} - -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, swipeLogic } from '../swipe.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runSwipe( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => swipeLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Swipe Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes withinElementRef and direction without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('withinElementRef'); + expect(schema).toHaveProperty('direction'); + expect(schema).not.toHaveProperty('x1'); + expect(schema).not.toHaveProperty('y1'); + expect(schema).not.toHaveProperty('x2'); + expect(schema).not.toHaveProperty('y2'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ withinElementRef: 'e1', direction: 'up' }).success).toBe( + true, + ); + expect( + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'diagonal' }).success, + ).toBe(false); + expect(schemaObject.safeParse({ direction: 'up' }).success).toBe(false); + expect(schemaObject.safeParse({ withinElementRef: 'e1' }).success).toBe(false); expect( schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, + withinElementRef: 'e1', + direction: 'down', + duration: 1.5, + distance: 10, + preDelay: 0.5, + postDelay: 0.25, }).success, ).toBe(true); - expect( - schemaObject.safeParse({ - x1: 100.5, - y1: 200, - x2: 300, - y2: 400, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', duration: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: -1, - }).success, + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', distance: 0 }).success, ).toBe(false); - expect( - schemaObject.safeParse({ - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - delta: 10, - preDelay: 0.5, - postDelay: 0.2, - }).success, - ).toBe(true); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + schemaObject.safeParse({ withinElementRef: 'e1', direction: 'down', preDelay: 10.1 }) + .success, + ).toBe(false); }); }); describe('Command Generation', () => { - it('should generate correct axe command for basic swipe', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/mocked/axe/path', - 'swipe', - '--start-x', - '100', - '--start-y', - '200', - '--end-x', - '300', - '--end-y', - '400', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('derives safe upward swipe points within the referenced element', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), ]); - }); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for swipe with duration', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 50, - y1: 75, - x2: 250, - y2: 350, - duration: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result).toMatchObject({ + didError: false, + action: { type: 'swipe', withinElementRef: 'e1', direction: 'up' }, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '50', + '100', '--start-y', - '75', + '340', '--end-x', - '250', + '100', '--end-y', - '350', - '--duration', - '1.5', + '60', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for swipe with all optional parameters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 0, - y1: 0, - x2: 500, - y2: 800, - duration: 2.0, - delta: 10, - preDelay: 0.5, - postDelay: 0.3, - }, - trackingExecutor, - mockAxeHelpers, - ), + it('preserves optional AXe swipe flags', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 200, height: 400 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { + simulatorId, + withinElementRef: 'e1', + direction: 'right', + duration: 2, + distance: 10, + preDelay: 0.5, + postDelay: 0.25, + }, + executor, ); - expect(capturedCommand).toEqual([ + expect(result.action).toMatchObject({ + type: 'swipe', + withinElementRef: 'e1', + direction: 'right', + durationSeconds: 2, + }); + expect(calls[0]?.command).toEqual([ '/mocked/axe/path', 'swipe', '--start-x', - '0', + '30', '--start-y', - '0', + '200', '--end-x', - '500', + '170', '--end-y', - '800', + '200', '--duration', '2', '--delta', @@ -230,286 +158,148 @@ describe('Swipe Tool', () => { '--pre-delay', '0.5', '--post-delay', - '0.3', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); - - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'swipe completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - swipeLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x1: 150, - y1: 250, - x2: 400, - y2: 600, - delta: 5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'swipe', - '--start-x', - '150', - '--start-y', - '250', - '--end-x', - '400', - '--end-y', - '600', - '--delta', - '5', + '0.25', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior', () => { - it('should return error for missing simulatorId via handler', async () => { - const result = await handler({ x1: 100, y1: 200, x2: 300, y2: 400 }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Missing required session defaults'); - expect(allText(result)).toContain('simulatorId is required'); - expect(allText(result)).toContain('session-set-defaults'); - }); + describe('Resolution failures', () => { + it('returns TARGET_NOT_ACTIONABLE without calling AXe when the frame is too small', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 1, height: 1 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - it('should return validation error for missing x1 once simulator default exists', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, + ); - const result = await handler({ - y1: 200, - x2: 300, - y2: 400, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - expect(result.isError).toBe(true); - expect(result.content[0].type).toBe('text'); - expect(allText(result)).toContain('Parameter validation failed'); - expect(allText(result)).toContain('x1: Invalid input: expected number, received undefined'); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); - it('should return success for valid swipe execution', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('returns TARGET_NOT_ACTIONABLE without calling AXe when derived swipe points are degenerate', async () => { + recordSnapshot([ + createNode({ + type: 'ScrollView', + role: 'AXScrollArea', + frame: { x: 0, y: 0, width: 2, height: 100 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) simulated successfully.', + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'right' }, + executor, ); - }); - it('should return success for swipe with duration', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(calls).toEqual([]); + }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - duration: 1.5, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Swipe from (100, 200) to (300, 400) duration=1.5s simulated successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'swipe completed', - error: '', - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot( + [createNode({ type: 'ScrollView', role: 'AXScrollArea' })], + Date.now() - 61_000, + ); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runSwipe( + { simulatorId, withinElementRef: 'e404', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate swipe.'); - expect(text).toContain('axe command failed'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle SystemError from command execution', async () => { - // Override the executor to throw SystemError for this test - const systemErrorExecutor = async () => { - throw new SystemError('System error occurred'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - systemErrorExecutor, - mockAxeHelpers, - ), + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: System error occurred'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle unexpected Error objects', async () => { - // Override the executor to throw an unexpected Error for this test - const unexpectedErrorExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - unexpectedErrorExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await handler({ withinElementRef: 'e1', direction: 'up' }); expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: Unexpected error'); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected string errors', async () => { - // Override the executor to throw a string error for this test - const stringErrorExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - const result = await runLogic(() => - swipeLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x1: 100, - y1: 200, - x2: 300, - y2: 400, - }, - stringErrorExecutor, - mockAxeHelpers, - ), + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode({ type: 'ScrollView', role: 'AXScrollArea' })]); + + const result = await runSwipe( + { simulatorId, withinElementRef: 'e1', direction: 'up' }, + createFailingExecutor('swipe failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(result.uiError).not.toHaveProperty('withinElementRef'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/tap.test.ts b/src/mcp/tools/ui-automation/__tests__/tap.test.ts index 092cc8820..ec994fcac 100644 --- a/src/mcp/tools/ui-automation/__tests__/tap.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/tap.test.ts @@ -1,198 +1,79 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; - -import { schema, handler, type AxeHelpers, tapLogic } from '../tap.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; - -function createMockAxeHelpers(): AxeHelpers { - return { - getAxePath: () => '/mocked/axe/path', - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; -} - -function createMockAxeHelpersWithNullPath(): AxeHelpers { - return { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({ SOME_ENV: 'value' }), - }; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { schema, handler, tapLogic } from '../tap.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTap( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => tapLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Tap Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef-only targeting fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); + expect(schema).not.toHaveProperty('id'); + expect(schema).not.toHaveProperty('label'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - - expect(schemaObject.safeParse({ x: 100, y: 200 }).success).toBe(true); - - expect(schemaObject.safeParse({ id: 'loginButton' }).success).toBe(true); - - expect(schemaObject.safeParse({ label: 'Log in' }).success).toBe(true); - - expect(schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton' }).success).toBe(true); - + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(true); + expect(schemaObject.safeParse({}).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: '' }).success).toBe(false); expect( - schemaObject.safeParse({ x: 100, y: 200, id: 'loginButton', label: 'Log in' }).success, + schemaObject.safeParse({ elementRef: 'e1', preDelay: 0.5, postDelay: 1 }).success, ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1, - }).success, - ).toBe(true); - - expect( - schemaObject.safeParse({ - x: 3.14, - y: 200, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 3.14, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - preDelay: -1, - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - x: 100, - y: 200, - postDelay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', preDelay: 10.1 }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1', postDelay: 10.1 }).success).toBe(false); }); }); describe('Command Generation', () => { - let callHistory: Array<{ - command: string[]; - logPrefix?: string; - useShell?: boolean; - opts?: { env?: Record; cwd?: string }; - }>; - - beforeEach(() => { - callHistory = []; - }); + it('uses AXe id targeting when the referenced element has an identifier', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command with minimal parameters', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '-x', - '100', - '-y', - '200', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); - }); - - it('should generate correct axe command with element id target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ + expect(result).toMatchObject({ didError: false, action: { type: 'tap', elementRef: 'e1' } }); + expect(calls).toHaveLength(1); + expect(calls[0]).toEqual({ command: [ '/mocked/axe/path', 'tap', '--id', - 'loginButton', + 'continue-button', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], logPrefix: '[AXe]: tap', useShell: false, @@ -200,515 +81,355 @@ describe('Tap Plugin', () => { }); }); - it('should generate correct axe command with element label target', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); + it('clears the cached runtime snapshot after a successful tap', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); + const { executor } = createTrackingExecutor(); - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - label: 'Log in', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ - '/mocked/axe/path', - 'tap', - '--label', - 'Log in', - '--udid', - '12345678-1234-4234-8234-123456789012', - ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + expect(result.didError).toBe(false); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should prefer coordinates over id/label when both are provided', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 120, - y: 240, - id: 'loginButton', - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('includes element type when tapping a referenced element with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'shared-action', + children: [ + createNode({ + type: 'Button', + role: 'AXButton', + AXUniqueId: 'shared-action', + AXLabel: 'Continue', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '120', - '-y', - '240', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ]); }); - it('should generate correct axe command with pre-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 300, - preDelay: 0.5, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); + it('uses coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 300, y: 400, width: 50, height: 80 }, + AXUniqueId: 'trash', + AXLabel: 'Remove', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTap({ simulatorId, elementRef: 'e2' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '325', '-y', '440', '--udid', simulatorId], + ]); + }); - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple accessibility elements matched selector' }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '150', - '-y', - '300', - '--pre-delay', - '0.5', + '--id', + 'shared-action', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with post-delay', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 250, - y: 400, - postDelay: 1.0, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap reports a parenthesized match count', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'weather.locationsSheet', + AXLabel: 'Clear search', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: + "Multiple (2) accessibility elements matched --id 'weather.locationsSheet'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '250', - '-y', - '400', - '--post-delay', - '1', + '--id', + 'weather.locationsSheet', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - it('should generate correct axe command with both delays', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - }); - - const wrappedExecutor = async ( - command: string[], - logPrefix?: string, - useShell?: boolean, - opts?: { env?: Record; cwd?: string }, - ) => { - callHistory.push({ command, logPrefix, useShell, opts }); - return mockExecutor(command, logPrefix, useShell, opts); - }; - - const mockAxeHelpers = createMockAxeHelpers(); - - await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 350, - y: 500, - preDelay: 0.3, - postDelay: 0.7, - }, - wrappedExecutor, - mockAxeHelpers, - ), - ); - - expect(callHistory).toHaveLength(1); - expect(callHistory[0]).toEqual({ - command: [ + it('falls back to the resolved center when selector tap reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Portland, 1:24 PM · Light Rain, 52°, H:55° L:48°', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: + "No accessibility element matched --label 'Portland, 1:24 PM · Light Rain, 52°, H:55° L:48°'. No tap performed.", + }, + { success: true, output: 'tapped by coordinate' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ '/mocked/axe/path', 'tap', - '-x', - '350', - '-y', - '500', - '--pre-delay', - '0.3', - '--post-delay', - '0.7', + '--label', + 'Portland, 1:24 PM · Light Rain, 52°, H:55° L:48°', + '--element-type', + 'Button', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ], - logPrefix: '[AXe]: tap', - useShell: false, - opts: { env: { SOME_ENV: 'value' } }, - }); + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ]); }); - }); - describe('Plugin Handler Validation', () => { - it('should require simulatorId session default when not provided', async () => { - const result = await handler({ - x: 100, - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); + it('does not fall back for unrelated failures that mention multiple', async () => { + recordSnapshot([ + createNode({ + type: 'Button', + role: 'AXButton', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'shared-action', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Failed after multiple retry attempts' }, + { success: true, output: 'should not run' }, + ]); + + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(result.didError).toBe(true); + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'tap', + '--id', + 'shared-action', + '--element-type', + 'Button', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await handler({ - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate is required when y is provided.'); + it('falls back to the referenced element center when no identifier exists', async () => { + recordSnapshot([ + createNode({ frame: { x: 10, y: 20, width: 100, height: 40 }, AXLabel: undefined }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runTap({ simulatorId, elementRef: 'e1', preDelay: 0.25, postDelay: 0.5 }, executor); + + expect(calls).toHaveLength(1); + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'tap', + '-x', + '60', + '-y', + '40', + '--pre-delay', + '0.25', + '--post-delay', + '0.5', + '--udid', + simulatorId, + ]); }); - it('should return validation error for missing y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await handler({ - x: 100, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate is required when x is provided.'); + it('uses a touch down/up activation for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + AXLabel: 'Reduce transparency', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + await runTap({ simulatorId, elementRef: 'e1' }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', + 'touch', + '-x', + '307', + '-y', + '903', + '--down', + '--up', + '--udid', + simulatorId, + ]); }); + }); - it('should return validation error when both id and label are provided without coordinates', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); - - const result = await handler({ - id: 'loginButton', - label: 'Log in', - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('id: Provide either id or label, not both.'); - }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return validation error for non-integer x coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - const result = await handler({ - x: 3.14, - y: 200, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: X coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return validation error for non-integer y coordinate', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const result = await handler({ - x: 100, - y: 3.14, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('y: Y coordinate must be an integer'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return validation error for negative preDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const result = await handler({ - x: 100, - y: 200, - preDelay: -1, - }); + const result = await runTap({ simulatorId, elementRef: 'e404' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('preDelay: Pre-delay must be non-negative'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return validation error for negative postDelay', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ enabled: false })]); + const { calls, executor } = createTrackingExecutor(); - const result = await handler({ - x: 100, - y: 200, - postDelay: -1, - }); + const result = await runTap({ simulatorId, elementRef: 'e1' }, executor); - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('postDelay: Post-delay must be non-negative'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); + expect(getRuntimeSnapshot(simulatorId)).not.toBeNull(); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should return DependencyError when axe binary is not found', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'Tap completed', - error: undefined, - }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - preDelay: 0.5, - postDelay: 1.0, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default before validation', async () => { + const result = await handler({ elementRef: 'e1' }); expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle DependencyError when axe binary not found (second test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Coordinates out of bounds', - }); + it('returns UI_STATE_CHANGED when identifier-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button' })]); - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('element not found'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (third test)', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'System error occurred', + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'UI_STATE_CHANGED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), }); - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); - it('should handle DependencyError when axe binary not found (fourth test)', async () => { - const mockExecutor = async () => { - throw new Error('ENOENT: no such file or directory'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('returns ACTION_FAILED when coordinate-based AXe tap fails after ref resolution', async () => { + recordSnapshot([createNode({ AXLabel: undefined })]); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (fifth test)', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle DependencyError when axe binary not found (sixth test)', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = createMockAxeHelpersWithNullPath(); - - const result = await runLogic(() => - tapLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTap( + { simulatorId, elementRef: 'e1' }, + createFailingExecutor('tap failed'), ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/touch.test.ts b/src/mcp/tools/ui-automation/__tests__/touch.test.ts index fd5b1641c..f12659a4f 100644 --- a/src/mcp/tools/ui-automation/__tests__/touch.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/touch.test.ts @@ -1,657 +1,238 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { createMockExecutor, mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, touchLogic } from '../touch.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; +import { + createFailingExecutor, + createMockAxeHelpers, + createNode, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTouch( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => touchLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; +} describe('Touch Plugin', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('exposes elementRef and touch flags without coordinate fields', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('down'); + expect(schema).toHaveProperty('up'); + expect(schema).not.toHaveProperty('x'); + expect(schema).not.toHaveProperty('y'); - it('should validate schema fields with safeParse', () => { - const schemaObj = z.object(schema); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - up: true, - }).success, - ).toBe(true); - - expect( - schemaObj.safeParse({ - x: 100.5, - y: 200, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200.5, - down: true, - }).success, - ).toBe(false); - - expect( - schemaObj.safeParse({ - x: 100, - y: 200, - down: true, - delay: -1, - }).success, - ).toBe(false); - - const withSimId = schemaObj.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', up: true }).success).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: -1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ elementRef: 'e1', down: true, delay: 10.1 }).success).toBe( + false, + ); + expect(schemaObject.safeParse({ down: true }).success).toBe(false); }); }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await handler({ - x: 100, - y: 200, - down: true, - }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + describe('Command Generation', () => { + it('touches down at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should surface parameter validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - const result = await handler({ - y: 200, - down: true, + expect(result).toMatchObject({ + didError: false, + action: { type: 'touch', elementRef: 'e1', event: 'touch down' }, }); - - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('x: Invalid input: expected number, received undefined'); - }); - }); - - describe('Command Generation', () => { - it('should generate correct axe command for touch down', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '100', + '60', '-y', - '200', + '40', '--down', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 150, - y: 250, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'touch', - '-x', - '150', - '-y', - '250', - '--up', - '--udid', - '12345678-1234-4234-8234-123456789012', - ]); - }); + it('touches up at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); - it('should generate correct axe command for touch down+up', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 300, - y: 400, - down: true, - up: true, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + await runTouch({ simulatorId, elementRef: 'e1', up: true }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '300', + '60', '-y', - '400', - '--down', + '40', '--up', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command for touch with delay', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 50, - y: 75, - down: true, - up: true, - delay: 1.5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('touches down and up with delay at the referenced element center', async () => { + recordSnapshot([createNode({ frame: { x: 10, y: 20, width: 100, height: 40 } })]); + const { calls, executor } = createTrackingExecutor(); + + await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true, delay: 1.5 }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '50', + '60', '-y', - '75', + '40', '--down', '--up', '--delay', '1.5', '--udid', - '12345678-1234-4234-8234-123456789012', + simulatorId, ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'touch completed', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = { - getAxePath: () => '/path/to/bundled/axe', - getBundledAxeEnvironment: () => ({ AXE_PATH: '/some/path' }), - }; - - await runLogic(() => - touchLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - x: 0, - y: 0, - up: true, - delay: 0.5, - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('uses the switch activation point for wide switch rows', async () => { + recordSnapshot([ + createNode({ + type: 'Switch', + role: 'AXSwitch', + frame: { x: 42.57, y: 889.68, width: 316.87, height: 26.89 }, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', + await runTouch({ simulatorId, elementRef: 'e1', down: true, up: true }, executor); + + expect(calls[0]?.command).toEqual([ + '/mocked/axe/path', 'touch', '-x', - '0', + '307', '-y', - '0', + '903', + '--down', '--up', - '--delay', - '0.5', '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + simulatorId, ]); }); }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockExecutor = createMockExecutor({ success: true }); - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Resolution failures', () => { + it('keeps down/up validation before snapshot resolution', async () => { + const { calls, executor } = createTrackingExecutor(); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); + const result = await runTouch({ simulatorId, elementRef: 'e1' }, executor); - it('should successfully perform touch down', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch down completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.error).toBe('At least one of "down" or "up" must be true'); + expect(result.action).toEqual({ type: 'touch', elementRef: 'e1' }); + expect(result.uiError).toBeUndefined(); + expect(calls).toEqual([]); }); - it('should successfully perform touch up', async () => { - const mockExecutor = createMockExecutor({ success: true, output: 'Touch up completed' }); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); - }); + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - it('should return error when neither down nor up is specified', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - }, - mockExecutor, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain('At least one of "down" or "up" must be true'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for touch down event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode()], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should return success for touch up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode()]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e404', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch up) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should return success for touch down+up event', async () => { - const mockExecutor = createMockExecutor({ - success: true, - output: 'touch completed', - error: undefined, - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ role: 'AXApplication', type: 'Application' })]); + const { calls, executor } = createTrackingExecutor(); - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - up: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTouch({ simulatorId, elementRef: 'e1', down: true }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain( - 'Touch event (touch down+up) at (100, 200) executed successfully.', - ); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle DependencyError when axe is not available', async () => { - const mockExecutor = createMockExecutor({ success: true }); - - const mockAxeHelpers = { - getAxePath: () => null, - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('rejects delay unless both down and up are true before AXe runs', async () => { + const result = await handler({ simulatorId, elementRef: 'e1', down: true, delay: 1 }); expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); - }); - - it('should handle AxeError from failed command execution', async () => { - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'axe command failed', - }); - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + expect(result.content[0].text).toContain( + 'Delay can only be used when both down and up are true', ); - - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to execute touch event.'); - expect(text).toContain('axe command failed'); }); - it('should handle SystemError from command execution', async () => { - const mockExecutor = async () => { - throw new Error('System error occurred'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); + it('requires simulatorId session default', async () => { + const result = await handler({ elementRef: 'e1', down: true }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - const mockExecutor = async () => { - throw new Error('Unexpected error'); - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), - ); - - expect(result.isError).toBe(true); - }); + it('returns ACTION_FAILED when AXe fails after ref resolution', async () => { + recordSnapshot([createNode()]); - it('should handle unexpected string errors', async () => { - const mockExecutor = async () => { - throw 'String error'; - }; - - const mockAxeHelpers = { - getAxePath: () => '/usr/local/bin/axe', - getBundledAxeEnvironment: () => ({}), - }; - - const result = await runLogic(() => - touchLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - x: 100, - y: 200, - down: true, - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTouch( + { simulatorId, elementRef: 'e1', down: true }, + createFailingExecutor('touch failed'), ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts index 945ea2aea..a5616e98f 100644 --- a/src/mcp/tools/ui-automation/__tests__/type_text.test.ts +++ b/src/mcp/tools/ui-automation/__tests__/type_text.test.ts @@ -1,481 +1,454 @@ -import { describe, it, expect, beforeEach } from 'vitest'; +import { beforeEach, describe, expect, it } from 'vitest'; import * as z from 'zod'; -import { - createMockExecutor, - createNoopExecutor, - mockProcess, -} from '../../../../test-utils/mock-executors.ts'; +import type { UiActionResultDomainResult } from '../../../../types/domain-results.ts'; import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { __resetRuntimeSnapshotStoreForTests } from '../shared/snapshot-ui-state.ts'; import { schema, handler, type_textLogic } from '../type_text.ts'; -import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; -import { allText, runLogic } from '../../../../test-utils/test-helpers.ts'; - -// Mock axe helpers for dependency injection -function createMockAxeHelpers( - overrides: { - getAxePathReturn?: string | null; - getBundledAxeEnvironmentReturn?: Record; - } = {}, -) { - return { - getAxePath: () => - overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/usr/local/bin/axe', - getBundledAxeEnvironment: () => overrides.getBundledAxeEnvironmentReturn ?? {}, - }; -} - -// Mock executor that tracks rejections for testing -function createRejectingExecutor(error: any) { - return async () => { - throw error; - }; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + createTrackingExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +async function runTypeText( + params: Parameters[0], + executor = createTrackingExecutor().executor, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => type_textLogic(params, executor, createMockAxeHelpers())); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as UiActionResultDomainResult; } describe('Type Text Tool', () => { beforeEach(() => { sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); }); describe('Schema Validation', () => { - it('should have handler function', () => { + it('requires elementRef and text', () => { expect(typeof handler).toBe('function'); - }); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('text'); + expect(schema).toHaveProperty('replaceExisting'); - it('should validate schema fields with safeParse', () => { const schemaObject = z.object(schema); - + expect(schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World' }).success).toBe(true); expect( - schemaObject.safeParse({ - text: 'Hello World', - }).success, + schemaObject.safeParse({ elementRef: 'e1', text: 'Hello World', replaceExisting: true }) + .success, ).toBe(true); + expect(schemaObject.safeParse({ elementRef: 'e1', text: '' }).success).toBe(false); + expect(schemaObject.safeParse({ text: 'Hello World' }).success).toBe(false); + expect(schemaObject.safeParse({ elementRef: 'e1' }).success).toBe(false); + }); + }); - expect( - schemaObject.safeParse({ - text: '', - }).success, - ).toBe(false); - - expect( - schemaObject.safeParse({ - text: 123, - }).success, - ).toBe(false); + describe('Command Generation', () => { + it('focuses the referenced text field by identifier, then types text', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Email', + AXUniqueId: 'email-field', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - expect(schemaObject.safeParse({}).success).toBe(false); + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'user@example.com' }, + executor, + ); - const withSimId = schemaObject.safeParse({ - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: 16 }, }); - expect(withSimId.success).toBe(true); - expect('simulatorId' in (withSimId.data as Record)).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'email-field', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'user@example.com', '--udid', simulatorId], + ]); }); - }); - describe('Handler Requirements', () => { - it('should require simulatorId session default', async () => { - const result = await handler({ text: 'Hello' }); + it('types all AXe-supported US keyboard punctuation characters', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Az09 !@#$%^&*()_+-={}[]|\\:";\'<>?,./`~'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Missing required session defaults'); - expect(message).toContain('simulatorId is required'); - expect(message).toContain('session-set-defaults'); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - it('should surface validation errors when defaults exist', async () => { - sessionStore.setDefaults({ simulatorId: '12345678-1234-4234-8234-123456789012' }); + expect(result).toMatchObject({ + didError: false, + action: { type: 'type-text', elementRef: 'e1', textLength: text.length }, + }); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', text, '--udid', simulatorId], + ]); + }); - const result = await handler({}); + it('rejects unsupported AXe typing characters before focusing or typing', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); + const text = 'Tokyo Reykjavík 42'; - expect(result.isError).toBe(true); - const message = result.content[0].text; - expect(message).toContain('Parameter validation failed'); - expect(message).toContain('text: Invalid input: expected string, received undefined'); - }); - }); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text }, executor); - describe('Command Generation', () => { - it('should generate correct axe command for basic text typing', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + message: expect.stringContaining('US keyboard characters'), + elementRef: 'e1', + recoveryHint: expect.stringContaining('US keyboard'), }); + expect(result.action).toEqual({ + type: 'type-text', + elementRef: 'e1', + textLength: text.length, + }); + expect(calls).toEqual([]); + expect(JSON.stringify(result)).not.toContain('Tokyo'); + expect(JSON.stringify(result)).not.toContain('Reykjavík'); + }); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); - - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Hello World', - '--udid', - '12345678-1234-4234-8234-123456789012', + it('includes text field type when focusing a referenced field with a shared identifier', async () => { + recordSnapshot([ + createNode({ + type: 'Group', + role: 'AXGroup', + AXUniqueId: 'locationSearchField', + children: [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXUniqueId: 'locationSearchField', + AXLabel: 'Search for a city', + }), + ], + }), + ]); + const { calls, executor } = createTrackingExecutor(); + + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with special characters', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('focuses by coordinates immediately when the snapshot already has duplicate selector matches', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 40, y: 200, width: 180, height: 40 }, + AXUniqueId: 'locationSearchField', + AXLabel: 'Search', + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'user@example.com', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e2', text: 'London' }, executor); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'user@example.com', - '--udid', - '12345678-1234-4234-8234-123456789012', + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '130', '-y', '220', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for text with numbers and symbols', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Password123!@#', - }, - trackingExecutor, - mockAxeHelpers, - ), - ); + it('falls back to the resolved center when selector focus is ambiguous', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: 'locationSearchField', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'Multiple 2 accessibility elements matched selector' }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ]); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - 'Password123!@#', - '--udid', - '12345678-1234-4234-8234-123456789012', + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'London' }, executor); + + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--id', + 'locationSearchField', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'London', '--udid', simulatorId], ]); }); - it('should generate correct axe command for long text', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('falls back to the resolved center when selector focus reports no match', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXUniqueId: undefined, + AXIdentifier: undefined, + AXLabel: 'Search for a city', + }), + ]); + const { calls, executor } = createSequencedExecutor([ + { + success: false, + error: "No accessibility element matched --label 'Search for a city'. No tap performed.", + }, + { success: true, output: 'focused by coordinate' }, + { success: true, output: 'typed' }, + ]); - const longText = - 'This is a very long text that needs to be typed into the simulator for testing purposes.'; - - await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: longText, - }, - trackingExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland' }, + executor, ); - expect(capturedCommand).toEqual([ - '/usr/local/bin/axe', - 'type', - longText, - '--udid', - '12345678-1234-4234-8234-123456789012', + expect(result.didError).toBe(false); + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--label', + 'Search for a city', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - it('should generate correct axe command with bundled axe path', async () => { - let capturedCommand: string[] = []; - const trackingExecutor = async (command: string[]) => { - capturedCommand = command; - return { - success: true, - output: 'Text typed successfully', - error: undefined, - process: mockProcess, - }; - }; - - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/path/to/bundled/axe', - getBundledAxeEnvironmentReturn: { AXE_PATH: '/some/path' }, - }); + it('selects existing text before typing when replaceExisting is true', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXValue: 'Tokyo', + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - await runLogic(() => - type_textLogic( - { - simulatorId: 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', - text: 'Test message', - }, - trackingExecutor, - mockAxeHelpers, - ), + await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Portland', replaceExisting: true }, + executor, ); - expect(capturedCommand).toEqual([ - '/path/to/bundled/axe', - 'type', - 'Test message', - '--udid', - 'ABCDEF12-3456-7890-ABCD-ABCDEFABCDEF', + expect(calls.map((call) => call.command)).toEqual([ + [ + '/mocked/axe/path', + 'tap', + '--value', + 'Tokyo', + '--element-type', + 'TextField', + '--udid', + simulatorId, + ], + [ + '/mocked/axe/path', + 'key-combo', + '--modifiers', + '227', + '--key', + '4', + '--udid', + simulatorId, + ], + ['/mocked/axe/path', 'type', 'Portland', '--udid', simulatorId], ]); }); - }); - describe('Handler Behavior (Complete Literal Returns)', () => { - it('should handle axe dependency error', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('focuses the referenced text field by center when no identifier exists', async () => { + recordSnapshot([ + createNode({ + type: 'TextField', + role: 'AXTextField', + frame: { x: 20, y: 30, width: 200, height: 50 }, + AXLabel: undefined, + }), + ]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), - ); + await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(calls.map((call) => call.command)).toEqual([ + ['/mocked/axe/path', 'tap', '-x', '120', '-y', '55', '--udid', simulatorId], + ['/mocked/axe/path', 'type', 'Hello', '--udid', simulatorId], + ]); }); + }); - it('should successfully type text', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); + describe('Resolution failures', () => { + it('returns SNAPSHOT_MISSING without calling AXe', async () => { + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_MISSING'); + expect(calls).toEqual([]); }); - it('should return success for valid text typing', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns SNAPSHOT_EXPIRED without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })], Date.now() - 61_000); + const { calls, executor } = createTrackingExecutor(); - const mockExecutor = createMockExecutor({ - success: true, - output: 'Text typed successfully', - error: undefined, - }); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBeFalsy(); - expect(allText(result)).toContain('Text typing simulated successfully.'); + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_EXPIRED'); + expect(calls).toEqual([]); }); - it('should handle DependencyError when axe binary not found', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: null, - }); + it('returns ELEMENT_REF_NOT_FOUND without calling AXe', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - createNoopExecutor(), - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e404', text: 'Hello' }, + executor, ); - expect(result.isError).toBe(true); - expect(allText(result)).toContain(AXE_NOT_AVAILABLE_MESSAGE); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'ELEMENT_REF_NOT_FOUND', elementRef: 'e404' }); + expect(calls).toEqual([]); }); - it('should handle AxeError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createMockExecutor({ - success: false, - output: '', - error: 'Text field not found', - }); + it('returns TARGET_NOT_ACTIONABLE without calling AXe', async () => { + recordSnapshot([createNode({ type: 'Button', role: 'AXButton' })]); + const { calls, executor } = createTrackingExecutor(); - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + const result = await runTypeText({ simulatorId, elementRef: 'e1', text: 'Hello' }, executor); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('Failed to simulate text typing.'); - expect(text).toContain('Text field not found'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_ACTIONABLE', elementRef: 'e1' }); + expect(calls).toEqual([]); }); + }); - it('should handle SystemError from command execution', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); - - const mockExecutor = createRejectingExecutor(new Error('ENOENT: no such file or directory')); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), - ); + describe('Handler Behavior', () => { + it('requires simulatorId session default', async () => { + const result = await handler({ elementRef: 'e1', text: 'Hello' }); expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); }); - it('should handle unexpected Error objects', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns ACTION_FAILED when focusing the resolved field fails', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: false, error: 'focus failed' }, + ]); - const mockExecutor = createRejectingExecutor(new Error('Unexpected error')); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(calls).toHaveLength(1); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); }); - it('should handle unexpected string errors', async () => { - const mockAxeHelpers = createMockAxeHelpers({ - getAxePathReturn: '/usr/local/bin/axe', - getBundledAxeEnvironmentReturn: {}, - }); + it('returns ACTION_FAILED when typing fails after focus succeeds', async () => { + recordSnapshot([createNode({ type: 'TextField', role: 'AXTextField' })]); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: 'focused' }, + { success: false, error: 'typing failed' }, + ]); - const mockExecutor = createRejectingExecutor('String error'); - - const result = await runLogic(() => - type_textLogic( - { - simulatorId: '12345678-1234-4234-8234-123456789012', - text: 'Hello World', - }, - mockExecutor, - mockAxeHelpers, - ), + const result = await runTypeText( + { simulatorId, elementRef: 'e1', text: 'Secret123' }, + executor, ); - expect(result.isError).toBe(true); - const text = allText(result); - expect(text).toContain('System error executing axe command.'); - expect(text).toContain('Failed to execute axe command: String error'); + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'ACTION_FAILED', + elementRef: 'e1', + recoveryHint: expect.stringContaining('snapshot_ui'), + }); + expect(calls).toHaveLength(2); + expect(JSON.stringify(result)).not.toContain('Secret123'); + expect(result.action).toEqual({ type: 'type-text', elementRef: 'e1', textLength: 9 }); }); }); }); diff --git a/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts new file mode 100644 index 000000000..b5ab08e07 --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/ui-action-test-helpers.ts @@ -0,0 +1,88 @@ +import type { AccessibilityNode } from '../../../../types/domain-results.ts'; +import type { CommandExecOptions, CommandExecutor } from '../../../../utils/execution/index.ts'; +import { mockProcess } from '../../../../test-utils/mock-executors.ts'; +import type { AxeHelpers } from '../shared/axe-command.ts'; +import { createRuntimeSnapshotRecord } from '../shared/runtime-snapshot.ts'; +import { recordRuntimeSnapshot } from '../shared/snapshot-ui-state.ts'; + +export const simulatorId = '12345678-1234-4234-8234-123456789012'; + +export interface CapturedCommandCall { + command: string[]; + logPrefix?: string; + useShell?: boolean; + opts?: CommandExecOptions; +} + +export function createMockAxeHelpers( + overrides: { + getAxePathReturn?: string | null; + getBundledAxeEnvironmentReturn?: Record; + } = {}, +): AxeHelpers { + return { + getAxePath: () => + overrides.getAxePathReturn !== undefined ? overrides.getAxePathReturn : '/mocked/axe/path', + getBundledAxeEnvironment: () => + overrides.getBundledAxeEnvironmentReturn ?? { SOME_ENV: 'value' }, + }; +} + +export function createTrackingExecutor(): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + return { success: true, output: 'ok', error: undefined, process: mockProcess }; + }; + + return { calls, executor }; +} + +export function createFailingExecutor(error: string): CommandExecutor { + return async () => ({ success: false, output: '', error, process: mockProcess }); +} + +export function createSequencedExecutor( + results: Array<{ success: boolean; output?: string; error?: string }>, +): { + calls: CapturedCommandCall[]; + executor: CommandExecutor; +} { + const calls: CapturedCommandCall[] = []; + let index = 0; + const executor: CommandExecutor = async (command, logPrefix, useShell, opts) => { + calls.push({ command, logPrefix, useShell, opts }); + const result = results[index] ?? results.at(-1) ?? { success: true }; + index += 1; + return { + success: result.success, + output: result.output ?? '', + error: result.error, + process: mockProcess, + }; + }; + + return { calls, executor }; +} + +export function createNode(overrides: Partial = {}): AccessibilityNode { + return { + type: 'Button', + role: 'AXButton', + frame: { x: 10, y: 20, width: 100, height: 40 }, + children: [], + enabled: true, + custom_actions: [], + AXLabel: 'Continue', + ...overrides, + }; +} + +export function recordSnapshot(nodes: AccessibilityNode[], capturedAtMs = Date.now()): void { + recordRuntimeSnapshot( + createRuntimeSnapshotRecord({ simulatorId, uiHierarchy: nodes, nowMs: capturedAtMs }), + ); +} diff --git a/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts new file mode 100644 index 000000000..7ec1bedbb --- /dev/null +++ b/src/mcp/tools/ui-automation/__tests__/wait_for_ui.test.ts @@ -0,0 +1,708 @@ +import { beforeEach, describe, expect, it } from 'vitest'; +import * as z from 'zod'; +import type { CaptureResultDomainResult } from '../../../../types/domain-results.ts'; +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { sessionStore } from '../../../../utils/session-store.ts'; +import { createMockToolHandlerContext } from '../../../../test-utils/test-helpers.ts'; +import { + __resetRuntimeSnapshotStoreForTests, + getRuntimeSnapshot, +} from '../shared/snapshot-ui-state.ts'; +import { handler, schema, wait_for_uiLogic } from '../wait_for_ui.ts'; +import { + createMockAxeHelpers, + createNode, + createSequencedExecutor, + recordSnapshot, + simulatorId, +} from './ui-action-test-helpers.ts'; + +function hierarchyJson(nodes: Array>): string { + return JSON.stringify({ elements: nodes }); +} + +function createTiming(startMs = 0): { + timing: { now: () => number; sleep: (durationMs: number) => Promise }; + getNow: () => number; +} { + let nowMs = startMs; + return { + timing: { + now: () => nowMs, + sleep: async (durationMs) => { + nowMs += durationMs; + }, + }, + getNow: () => nowMs, + }; +} + +async function runWaitForUi( + params: Parameters[0], + executor: CommandExecutor, + timing = createTiming().timing, +): Promise { + const { ctx, run } = createMockToolHandlerContext(); + await run(() => wait_for_uiLogic(params, executor, createMockAxeHelpers(), undefined, timing)); + expect(ctx.structuredOutput?.schemaVersion).toBe('2'); + return ctx.structuredOutput?.result as CaptureResultDomainResult; +} + +function firstRuntimeLabel(result: CaptureResultDomainResult): string | undefined { + return result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? result.capture.elements[0]?.label + : undefined; +} + +describe('Wait for UI Plugin', () => { + beforeEach(() => { + sessionStore.clear(); + __resetRuntimeSnapshotStoreForTests(); + }); + + describe('Schema Validation', () => { + it('exposes public selector fields without simulatorId in the public schema', () => { + expect(typeof handler).toBe('function'); + expect(schema).toHaveProperty('predicate'); + expect(schema).toHaveProperty('elementRef'); + expect(schema).toHaveProperty('identifier'); + expect(schema).toHaveProperty('label'); + expect(schema).toHaveProperty('role'); + expect(schema).toHaveProperty('value'); + expect(schema).toHaveProperty('text'); + expect(schema).not.toHaveProperty('simulatorId'); + + const schemaObject = z.object(schema); + expect(schemaObject.safeParse({ predicate: 'settled' }).success).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'exists', identifier: 'continue-button' }).success, + ).toBe(true); + expect( + schemaObject.safeParse({ predicate: 'gone', label: 'Loading', role: 'text' }).success, + ).toBe(true); + expect(schemaObject.safeParse({ predicate: 'textContains', text: 'Ready' }).success).toBe( + true, + ); + }); + + it('requires simulatorId session default before validation', async () => { + const result = await handler({ predicate: 'settled' }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Missing required session defaults'); + expect(result.content[0].text).toContain('simulatorId is required'); + }); + + it('requires textContains text through handler validation', async () => { + const result = await handler({ + simulatorId, + predicate: 'textContains', + identifier: 'status', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('rejects whitespace-only text through handler validation', async () => { + const result = await handler({ + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: ' ', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('textContains waits require text'); + }); + + it('rejects text on non-textContains predicates instead of ignoring it', async () => { + const result = await handler({ + simulatorId, + predicate: 'gone', + role: 'text', + text: 'Loading', + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('text is only supported for textContains waits'); + }); + + it('rejects unknown fields instead of silently broadening wait selectors', async () => { + const result = await handler({ + simulatorId, + predicate: 'textContains', + text: 'Portland', + selector: { role: 'button' }, + }); + + expect(result.isError).toBe(true); + expect(result.content[0].text).toContain('Unrecognized key: "selector"'); + }); + }); + + it('uses the resolved simulatorId in next-step params', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + const { result, run } = createMockToolHandlerContext(); + + await run(() => + wait_for_uiLogic( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + createMockAxeHelpers(), + undefined, + createTiming().timing, + ), + ); + + expect(result.nextStepParams).toEqual({ + snapshot_ui: { simulatorId }, + wait_for_ui: { simulatorId, predicate: 'settled' }, + }); + }); + + it('converts elementRef to identifier before polling', async () => { + recordSnapshot([createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue' })], 0); + const { calls, executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'continue-button', AXLabel: 'Continue now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual( + expect.objectContaining({ + type: 'runtime-snapshot', + protocol: 'rs/1', + screenHash: expect.any(String), + seq: 2, + elements: [expect.objectContaining({ ref: 'e1', identifier: 'continue-button' })], + }), + ); + expect(calls[0]?.command).toEqual(['/mocked/axe/path', 'describe-ui', '--udid', simulatorId]); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('converts elementRef to label plus role when no identifier exists', async () => { + recordSnapshot([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })], 0); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXLabel: 'Continue', AXUniqueId: undefined })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(firstRuntimeLabel(result)).toBe('Continue'); + }); + + it('converts elementRef to value plus role when no identifier or label exists', async () => { + recordSnapshot( + [ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ], + 0, + ); + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + title: null, + help: null, + AXValue: 'Email', + AXUniqueId: undefined, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('rejects elementRef without a stable identifier, label, or value selector', async () => { + recordSnapshot( + [ + createNode({ + AXLabel: null, + title: null, + help: null, + AXValue: null, + AXUniqueId: undefined, + }), + ], + 0, + ); + const { calls, executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode()]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', elementRef: 'e1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ code: 'TARGET_NOT_FOUND', elementRef: 'e1' }); + expect(calls).toEqual([]); + }); + + it('matches explicit selector fields by exact AND', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Submit', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Submit', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', label: 'Submit', role: 'button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('allows multiple matches for exists', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'exists', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for gone when selector count is zero', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXLabel: 'Ready' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'gone', label: 'Loading', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toEqual({ predicate: 'gone', matches: [] }); + }); + + it('returns TARGET_AMBIGUOUS when focused selector matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + createNode({ AXLabel: 'Duplicate', AXUniqueId: undefined }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', label: 'Duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: expect.arrayContaining([ + expect.objectContaining({ label: 'Duplicate' }), + expect.objectContaining({ label: 'Duplicate' }), + ]), + }); + }); + + it('returns TARGET_NOT_ACTIONABLE when focused state is unavailable', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + candidates: [expect.objectContaining({ identifier: 'email-field' })], + }); + }); + + it('succeeds for focused when the matched element is focused', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ + AXUniqueId: 'email-field', + role: 'AXTextField', + type: 'TextField', + AXLabel: null, + AXValue: 'hello@example.com', + AXFocused: true, + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'focused', identifier: 'email-field', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('times out with latest snapshot and candidates for unresolved enabled state', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([createNode({ AXUniqueId: 'login-button', enabled: false })]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'login-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + timeoutMs: 0, + candidates: [expect.objectContaining({ identifier: 'login-button' })], + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(getRuntimeSnapshot(simulatorId, 0)?.payload).toBe(result.capture); + }); + + it('includes empty candidates and exact-match guidance for selector timeouts with zero matches', async () => { + const { executor } = createSequencedExecutor([ + { success: true, output: hierarchyJson([createNode({ AXUniqueId: 'other-button' })]) }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'enabled', identifier: 'missing-button', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'WAIT_TIMEOUT', + candidates: [], + recoveryHint: + 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.', + }); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + }); + + it('checks textContains against normalized case-insensitive value before label', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXUniqueId: 'status', AXLabel: 'Loading', AXValue: 'Server Ready' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'textContains', + identifier: 'status', + text: 'server ready', + timeoutMs: 0, + }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('narrows selector matches by text before treating textContains as ambiguous', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Close', role: 'AXButton', type: 'Button' }), + createNode({ + AXLabel: 'Lisbon, Portugal, 9:24 PM · Sunny', + role: 'AXButton', + type: 'Button', + }), + createNode({ AXLabel: 'Clear search', role: 'AXButton', type: 'Button' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for textContains when selector plus text still matches multiple elements', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Lisbon saved', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon details', role: 'AXButton', type: 'Button' }), + createNode({ AXLabel: 'Lisbon', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'button', text: 'Lisbon', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Lisbon saved' }), + expect.objectContaining({ label: 'Lisbon details' }), + ], + }); + }); + + it('supports selector-free textContains when exactly one element matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Header' }), + createNode({ AXLabel: 'Light rain is expected around 2 PM.' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Light rain', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.capture).toEqual(expect.objectContaining({ type: 'runtime-snapshot' })); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [expect.objectContaining({ label: 'Light rain is expected around 2 PM.' })], + }); + }); + + it('succeeds for selector-free textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'You just pressed the button!' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'You just pressed the button!', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'you just pressed', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + expect(result.waitMatch).toMatchObject({ + predicate: 'textContains', + matches: [ + expect.objectContaining({ label: 'You just pressed the button!' }), + expect.objectContaining({ value: 'You just pressed the button!' }), + ], + }); + }); + + it('succeeds for selector textContains when multiple candidates share matching visible text', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + createNode({ AXLabel: 'Duplicate status', role: 'AXStaticText', type: 'StaticText' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', role: 'text', text: 'duplicate', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('succeeds for selector-free textContains when multiple candidates exactly match', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Hello from rs1' }), + createNode({ + type: 'TextField', + role: 'AXTextField', + AXLabel: null, + AXValue: 'Hello from rs1', + }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'hello from rs1', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(false); + }); + + it('returns TARGET_AMBIGUOUS for selector-free textContains with mixed partial matches', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready' }), + createNode({ AXLabel: 'Ready now' }), + ]), + }, + ]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'textContains', text: 'Ready', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError).toMatchObject({ + code: 'TARGET_AMBIGUOUS', + candidates: [ + expect.objectContaining({ label: 'Ready' }), + expect.objectContaining({ label: 'Ready now' }), + ], + }); + }); + + it('clears the runtime store when every poll returns unparsable UI', async () => { + recordSnapshot([createNode({ AXUniqueId: 'stale-button' })], 0); + const { executor } = createSequencedExecutor([{ success: true, output: 'not json' }]); + + const result = await runWaitForUi( + { simulatorId, predicate: 'settled', timeoutMs: 0 }, + executor, + ); + + expect(result.didError).toBe(true); + expect(result.uiError?.code).toBe('SNAPSHOT_PARSE_FAILED'); + expect(getRuntimeSnapshot(simulatorId)).toBeNull(); + }); + + it('waits until runtime snapshot element signatures remain settled', async () => { + const { executor } = createSequencedExecutor([ + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Loading', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + { + success: true, + output: hierarchyJson([ + createNode({ AXLabel: 'Ready', frame: { x: 0, y: 0, width: 100, height: 40 } }), + ]), + }, + ]); + const { timing, getNow } = createTiming(); + + const result = await runWaitForUi( + { + simulatorId, + predicate: 'settled', + timeoutMs: 500, + pollIntervalMs: 100, + settledDurationMs: 100, + }, + executor, + timing, + ); + + expect(result.didError).toBe(false); + expect(getNow()).toBe(200); + expect(firstRuntimeLabel(result)).toBe('Ready'); + }); +}); diff --git a/src/mcp/tools/ui-automation/batch.ts b/src/mcp/tools/ui-automation/batch.ts new file mode 100644 index 000000000..0f5de5ff2 --- /dev/null +++ b/src/mcp/tools/ui-automation/batch.ts @@ -0,0 +1,142 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; +import { + createUiActionFailureResult, + createUiActionSuccessResult, + mapAxeCommandError, + setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, +} from './shared/domain-result.ts'; + +const batchSchema = z.object({ + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + steps: z + .array(z.string().min(1, { message: 'steps must not contain empty values' })) + .min(1, { message: 'At least one batch step is required' }) + .max(100, { message: 'At most 100 batch steps are supported' }), + axCache: z.enum(['perBatch', 'perStep', 'none']).optional(), + typeSubmission: z.enum(['chunked', 'composite']).optional(), + typeChunkSize: z.number().int().min(1).optional(), + tapStyle: z.enum(['automatic', 'simulator', 'physical']).optional(), + continueOnError: z.boolean().optional(), + waitTimeout: z.number().min(0, { message: 'waitTimeout must be non-negative' }).optional(), + pollInterval: z.number().positive({ message: 'pollInterval must be greater than 0' }).optional(), +}); + +type BatchParams = z.infer; +type BatchResult = UiActionResultDomainResult; + +const LOG_PREFIX = '[AXe]'; + +function buildBatchCommandArgs(params: BatchParams): string[] { + const commandArgs = ['batch']; + for (const step of params.steps) { + commandArgs.push('--step', step); + } + if (params.axCache !== undefined) { + commandArgs.push('--ax-cache', params.axCache); + } + if (params.typeSubmission !== undefined) { + commandArgs.push('--type-submission', params.typeSubmission); + } + if (params.typeChunkSize !== undefined) { + commandArgs.push('--type-chunk-size', String(params.typeChunkSize)); + } + if (params.tapStyle !== undefined) { + commandArgs.push('--tap-style', params.tapStyle); + } + if (params.continueOnError === true) { + commandArgs.push('--continue-on-error'); + } + if (params.waitTimeout !== undefined) { + commandArgs.push('--wait-timeout', String(params.waitTimeout)); + } + if (params.pollInterval !== undefined) { + commandArgs.push('--poll-interval', String(params.pollInterval)); + } + return commandArgs; +} + +export function createBatchExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): NonStreamingExecutor { + return async (params) => { + const toolName = 'batch'; + const { simulatorId, steps } = params; + const action = { type: 'batch' as const, stepCount: steps.length }; + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); + } + + const commandArgs = buildBatchCommandArgs(params); + log('info', `${LOG_PREFIX}/${toolName}: Starting ${steps.length} step batch on ${simulatorId}`); + + try { + await executeAxeCommand(commandArgs, simulatorId, 'batch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); + log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to execute AXe batch with ${steps.length} steps.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message); + } + }; +} + +export async function batchLogic( + params: BatchParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), +): Promise { + const ctx = getHandlerContext(); + const executeBatch = createBatchExecutor(executor, axeHelpers, debuggerManager); + const result = await executeBatch(params); + + setUiActionStructuredOutput(ctx, result); +} + +const publicSchemaObject = z.strictObject(batchSchema.omit({ simulatorId: true } as const).shape); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: batchSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(batchSchema), + logicFunction: (params: BatchParams, executor: CommandExecutor) => + batchLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/mcp/tools/ui-automation/button.ts b/src/mcp/tools/ui-automation/button.ts index 82ca07bbd..280ca0b07 100644 --- a/src/mcp/tools/ui-automation/button.ts +++ b/src/mcp/tools/ui-automation/button.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const buttonSchema = z.object({ @@ -29,7 +31,8 @@ const buttonSchema = z.object({ .describe('apple-pay|home|lock|side-button|siri'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -38,11 +41,19 @@ type ButtonParams = z.infer; type ButtonResult = UiActionResultDomainResult; const LOG_PREFIX = '[AXe]'; +const DEFAULT_BUTTON_SETTLE_DELAY_MS = 750; + +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} export function createButtonExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): NonStreamingExecutor { return async (params) => { const toolName = 'button'; @@ -67,9 +78,16 @@ export function createButtonExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'button', executor, axeHelpers); + if (settleDelayMs > 0) { + await delayMs(settleDelayMs); + } + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to press button '${buttonType}'.`, }); @@ -86,9 +104,10 @@ export async function buttonLogic( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + settleDelayMs = DEFAULT_BUTTON_SETTLE_DELAY_MS, ): Promise { const ctx = getHandlerContext(); - const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager); + const executeButton = createButtonExecutor(executor, axeHelpers, debuggerManager, settleDelayMs); const result = await executeButton(params); setUiActionStructuredOutput(ctx, result); diff --git a/src/mcp/tools/ui-automation/gesture.ts b/src/mcp/tools/ui-automation/gesture.ts index 447074cb3..01a6848d0 100644 --- a/src/mcp/tools/ui-automation/gesture.ts +++ b/src/mcp/tools/ui-automation/gesture.ts @@ -19,6 +19,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -27,6 +28,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const gestureSchema = z.object({ @@ -49,6 +51,7 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(2000) .optional() .describe( 'Screen width in pixels. Used for gesture calculations. Auto-detected if not provided.', @@ -57,28 +60,33 @@ const gestureSchema = z.object({ .number() .int() .min(1) + .max(3000) .optional() .describe( 'Screen height in pixels. Used for gesture calculations. Auto-detected if not provided.', ), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('Duration of the gesture in seconds.'), delta: z .number() - .min(0, { message: 'Delta must be non-negative' }) + .positive({ message: 'Delta must be greater than 0' }) + .max(200, { message: 'Delta must be at most 200' }) .optional() .describe('Distance to move in pixels.'), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('Delay before starting the gesture in seconds.'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('Delay after completing the gesture in seconds.'), }); @@ -132,9 +140,13 @@ export function createGestureExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'gesture', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to execute gesture '${preset}'.`, }); diff --git a/src/mcp/tools/ui-automation/key_press.ts b/src/mcp/tools/ui-automation/key_press.ts index 7c8afc647..b851d198a 100644 --- a/src/mcp/tools/ui-automation/key_press.ts +++ b/src/mcp/tools/ui-automation/key_press.ts @@ -12,6 +12,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -20,6 +21,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keyPressSchema = z.object({ @@ -29,10 +31,11 @@ const keyPressSchema = z.object({ .int({ message: 'HID keycode to press (0-255)' }) .min(0) .max(255) - .describe('HID keycode'), + .describe('HID keycode. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) + .max(10, { message: 'Duration must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -70,9 +73,13 @@ export function createKeyPressExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => `Failed to simulate key press (code: ${keyCode}).`, }); diff --git a/src/mcp/tools/ui-automation/key_sequence.ts b/src/mcp/tools/ui-automation/key_sequence.ts index 95cafe611..998ad7f56 100644 --- a/src/mcp/tools/ui-automation/key_sequence.ts +++ b/src/mcp/tools/ui-automation/key_sequence.ts @@ -18,6 +18,7 @@ import { toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; @@ -26,6 +27,7 @@ import { createUiActionSuccessResult, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const keySequenceSchema = z.object({ @@ -33,8 +35,13 @@ const keySequenceSchema = z.object({ keyCodes: z .array(z.number().int().min(0).max(255)) .min(1, { message: 'At least one key code required' }) - .describe('HID keycodes'), - delay: z.number().min(0, { message: 'Delay must be non-negative' }).optional(), + .max(100, { message: 'At most 100 key codes are supported' }) + .describe('HID keycodes. Common values: 40 Return/Enter, 42 Backspace, 43 Tab, 44 Space.'), + delay: z + .number() + .min(0, { message: 'Delay must be non-negative' }) + .max(5, { message: 'Delay must be at most 5 seconds' }) + .optional(), }); type KeySequenceParams = z.infer; @@ -73,9 +80,13 @@ export function createKeySequenceExecutor( try { await executeAxeCommand(commandArgs, simulatorId, 'key-sequence', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute key sequence.', }); diff --git a/src/mcp/tools/ui-automation/long_press.ts b/src/mcp/tools/ui-automation/long_press.ts index 4a202066d..38953c6e9 100644 --- a/src/mcp/tools/ui-automation/long_press.ts +++ b/src/mcp/tools/ui-automation/long_press.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Long Press * - * Long press at specific coordinates for given duration (ms). - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Long presses a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,17 +26,19 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const longPressSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate for the long press' }), - y: z.number().int({ message: 'Y coordinate for the long press' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), duration: z .number() - .positive({ message: 'Duration of the long press in milliseconds' }) + .positive({ message: 'Duration must be greater than 0 milliseconds' }) + .max(10_000, { message: 'Duration must be at most 10000 milliseconds' }) .describe('milliseconds'), }); @@ -56,8 +58,15 @@ export function createLongPressExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'long_press'; - const { simulatorId, x, y, duration } = params; - const action = { type: 'long-press' as const, x, y, durationMs: duration }; + const { simulatorId, elementRef, duration } = params; + const action = { type: 'long-press' as const, elementRef, durationMs: duration }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'longPress'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -68,13 +77,14 @@ export function createLongPressExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const delayInSeconds = Number(duration) / 1000; + const center = getRuntimeElementActivationPoint(resolution.element); + const delayInSeconds = duration / 1000; const commandArgs = [ 'touch', '-x', - String(x), + String(center.x), '-y', - String(y), + String(center.y), '--down', '--up', '--delay', @@ -83,23 +93,29 @@ export function createLongPressExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting for (${x}, ${y}), ${duration}ms on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting for elementRef ${elementRef}, ${duration}ms on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate long press at (${x}, ${y}).`, + axeFailureMessage: () => `Failed to simulate long press on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/screenshot.ts b/src/mcp/tools/ui-automation/screenshot.ts index 426b5663d..945c269f9 100644 --- a/src/mcp/tools/ui-automation/screenshot.ts +++ b/src/mcp/tools/ui-automation/screenshot.ts @@ -60,6 +60,44 @@ interface SimctlDeviceList { devices: Record; } +async function getSimulatorDeviceForSimulatorId( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; + const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); + + if (!result.success || !result.output) { + return null; + } + + const data = JSON.parse(result.output) as SimctlDeviceList; + for (const devices of Object.values(data.devices)) { + const match = devices.find((device) => device.udid === simulatorId); + if (match) { + return match; + } + } + + return null; +} + +async function assertSimulatorBooted( + simulatorId: string, + executor: CommandExecutor, +): Promise { + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (!device) { + throw new SystemError(`Simulator ${simulatorId} was not found.`); + } + if (device.state !== 'Booted') { + throw new SystemError( + `Simulator ${simulatorId} is ${device.state ?? 'not booted'}. Boot the simulator and try again.`, + ); + } + return device; +} + function escapeSwiftStringLiteral(value: string): string { return value .replace(/\\/g, '\\\\') @@ -96,21 +134,10 @@ export async function getDeviceNameForSimulatorId( executor: CommandExecutor, ): Promise { try { - const listCommand = ['xcrun', 'simctl', 'list', 'devices', '-j']; - const result = await executor(listCommand, `${LOG_PREFIX}: list devices`, false); - - if (result.success && result.output) { - const data = JSON.parse(result.output) as SimctlDeviceList; - const devices = data.devices; - - for (const runtime of Object.keys(devices)) { - for (const device of devices[runtime]) { - if (device.udid === simulatorId) { - log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); - return device.name; - } - } - } + const device = await getSimulatorDeviceForSimulatorId(simulatorId, executor); + if (device) { + log('info', `${LOG_PREFIX}: Found device name "${device.name}" for ${simulatorId}`); + return device.name; } log('warn', `${LOG_PREFIX}: Could not find device name for ${simulatorId}`); return null; @@ -219,6 +246,7 @@ export function createScreenshotExecutor( ); try { + const simulatorDevice = await assertSimulatorBooted(simulatorId, executor); const result = await executor(commandArgs, `${LOG_PREFIX}: screenshot`, false); if (!result.success) { @@ -228,8 +256,7 @@ export function createScreenshotExecutor( log('info', `${LOG_PREFIX}/screenshot: Success for ${simulatorId}`); try { - const deviceName = await getDeviceNameForSimulatorId(simulatorId, executor); - const isLandscape = await detectLandscapeMode(executor, deviceName ?? undefined); + const isLandscape = await detectLandscapeMode(executor, simulatorDevice.name); if (isLandscape) { log('info', `${LOG_PREFIX}/screenshot: Landscape mode detected, rotating +90`); const rotated = await rotateImage(screenshotPath, 90, executor); diff --git a/src/mcp/tools/ui-automation/shared/domain-result.ts b/src/mcp/tools/ui-automation/shared/domain-result.ts index 6b91c6a6c..b2168dfbc 100644 --- a/src/mcp/tools/ui-automation/shared/domain-result.ts +++ b/src/mcp/tools/ui-automation/shared/domain-result.ts @@ -1,4 +1,4 @@ -import type { ToolHandlerContext } from '../../../../rendering/types.ts'; +import type { RenderHints, ToolHandlerContext } from '../../../../rendering/types.ts'; import type { BasicDiagnostics, CapturePayload, @@ -6,12 +6,19 @@ import type { UiAction, UiActionResultDomainResult, } from '../../../../types/domain-results.ts'; +import type { + UiAutomationRecoverableError, + UiAutomationRecoverableErrorCode, + UiWaitMatch, +} from '../../../../types/ui-snapshot.ts'; import { AXE_NOT_AVAILABLE_MESSAGE } from '../../../../utils/axe-helpers.ts'; import { createBasicDiagnostics } from '../../../../utils/diagnostics.ts'; import { AxeError, DependencyError, SystemError } from '../../../../utils/errors.ts'; const UI_ACTION_SCHEMA = 'xcodebuildmcp.output.ui-action-result'; const CAPTURE_SCHEMA = 'xcodebuildmcp.output.capture-result'; +const REFRESH_SNAPSHOT_RECOVERY_HINT = + 'Run snapshot_ui again and retry with a current element reference from the refreshed snapshot.'; function createDiagnostics( warnings: readonly string[] = [], @@ -28,10 +35,25 @@ function compact(values: Array): string[] { return values.filter((value): value is string => typeof value === 'string' && value.length > 0); } +export function createUiAutomationRecoverableError(params: { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint?: string; + elementRef?: string; +}): UiAutomationRecoverableError { + return { + code: params.code, + message: params.message, + recoveryHint: params.recoveryHint ?? REFRESH_SNAPSHOT_RECOVERY_HINT, + ...(params.elementRef ? { elementRef: params.elementRef } : {}), + }; +} + export function createUiActionSuccessResult( action: UiAction, simulatorId: string, warnings: Array = [], + options: { uiError?: UiAutomationRecoverableError } = {}, ): UiActionResultDomainResult { return { kind: 'ui-action-result', @@ -41,6 +63,7 @@ export function createUiActionSuccessResult( action, artifacts: { simulatorId }, diagnostics: createDiagnostics(compact(warnings), []), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -51,6 +74,7 @@ export function createUiActionFailureResult( options: { warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): UiActionResultDomainResult { return { @@ -61,6 +85,7 @@ export function createUiActionFailureResult( action, artifacts: { simulatorId }, diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -70,6 +95,8 @@ export function createCaptureSuccessResult( screenshotPath?: string; capture?: CapturePayload; warnings?: Array; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; } = {}, ): CaptureResultDomainResult { return { @@ -83,6 +110,8 @@ export function createCaptureSuccessResult( }, ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), []), + ...(options.uiError ? { uiError: options.uiError } : {}), + ...(options.waitMatch ? { waitMatch: options.waitMatch } : {}), }; } @@ -91,8 +120,10 @@ export function createCaptureFailureResult( message: string, options: { screenshotPath?: string; + capture?: CapturePayload; warnings?: Array; details?: Array; + uiError?: UiAutomationRecoverableError; } = {}, ): CaptureResultDomainResult { return { @@ -104,7 +135,9 @@ export function createCaptureFailureResult( simulatorId, ...(options.screenshotPath ? { screenshotPath: options.screenshotPath } : {}), }, + ...(options.capture ? { capture: options.capture } : {}), diagnostics: createDiagnostics(compact(options.warnings ?? []), compact(options.details ?? [])), + ...(options.uiError ? { uiError: options.uiError } : {}), }; } @@ -115,6 +148,10 @@ interface AxeErrorMessages { unexpectedFailureMessage?: (message: string) => string; } +export function shouldInvalidateRuntimeSnapshotAfterActionError(error: unknown): boolean { + return error instanceof AxeError; +} + export function mapAxeCommandError( error: unknown, messages: AxeErrorMessages, @@ -129,7 +166,7 @@ export function mapAxeCommandError( if (error instanceof AxeError) { return { message: messages.axeFailureMessage(error), - diagnostics: createDiagnostics([], compact([error.axeOutput || error.message])), + diagnostics: createDiagnostics([], compact([error.axeOutput ?? error.message])), }; } @@ -154,17 +191,19 @@ export function setUiActionStructuredOutput( ctx.structuredOutput = { result, schema: UI_ACTION_SCHEMA, - schemaVersion: '1', + schemaVersion: '2', }; } export function setCaptureStructuredOutput( ctx: ToolHandlerContext, result: CaptureResultDomainResult, + renderHints?: RenderHints, ): void { ctx.structuredOutput = { result, schema: CAPTURE_SCHEMA, - schemaVersion: '1', + schemaVersion: '2', + ...(renderHints ? { renderHints } : {}), }; } diff --git a/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts new file mode 100644 index 000000000..4adba639e --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/runtime-snapshot.ts @@ -0,0 +1,701 @@ +import type { AccessibilityNode, Frame, Point } from '../../../../types/domain-results.ts'; +import type { + RuntimeActionHintV1, + RuntimeActionNameV1, + RuntimeElementRoleV1, + RuntimeElementStateV1, + RuntimeElementV1, + RuntimeSnapshotElementRecord, + RuntimeSnapshotRecord, + RuntimeSnapshotV1, +} from '../../../../types/ui-snapshot.ts'; + +export const RUNTIME_SNAPSHOT_PROTOCOL = 'rs/1' as const; +export const RUNTIME_SNAPSHOT_TTL_MS = 60_000; + +interface NormalizedNodeInput { + node: AccessibilityNode; + path: string; + depth: number; +} + +export class RuntimeSnapshotParseError extends Error { + constructor(message: string) { + super(message); + this.name = 'RuntimeSnapshotParseError'; + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + +function normalizeText(value: unknown): string | undefined { + if (typeof value !== 'string' && typeof value !== 'number' && typeof value !== 'boolean') { + return undefined; + } + + const normalized = String(value).replace(/\s+/g, ' ').trim(); + return normalized.length > 0 ? normalized : undefined; +} + +function readText(node: AccessibilityNode, keys: readonly string[]): string | undefined { + for (const key of keys) { + const value = normalizeText(node[key]); + if (value) { + return value; + } + } + return undefined; +} + +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value); +} + +function normalizeFrame(frame: Frame): Frame { + return { + x: Number(frame.x.toFixed(2)), + y: Number(frame.y.toFixed(2)), + width: Number(frame.width.toFixed(2)), + height: Number(frame.height.toFixed(2)), + }; +} + +function readFrameObject(value: unknown): Frame | null { + if (!isRecord(value)) { + return null; + } + + const { x, y, width, height } = value; + if ( + !isFiniteNumber(x) || + !isFiniteNumber(y) || + !isFiniteNumber(width) || + !isFiniteNumber(height) + ) { + return null; + } + + return normalizeFrame({ x, y, width, height }); +} + +function parseAxFrame(value: unknown): Frame | null { + if (typeof value !== 'string') { + return null; + } + + const numbers = value.match(/-?\d+(?:\.\d+)?/g)?.map(Number) ?? []; + if (numbers.length < 4 || numbers.some((entry) => !Number.isFinite(entry))) { + return null; + } + + const [x = 0, y = 0, width = 0, height = 0] = numbers; + return normalizeFrame({ x, y, width, height }); +} + +function readFrame(node: AccessibilityNode): Frame { + return ( + readFrameObject(node.frame) ?? parseAxFrame(node.AXFrame) ?? { x: 0, y: 0, width: 0, height: 0 } + ); +} + +function deriveRole(node: AccessibilityNode): RuntimeElementRoleV1 | undefined { + const roleText = [node.role, node.type, node.subrole, node.role_description] + .map((value) => normalizeText(value)?.toLowerCase()) + .filter((value): value is string => value !== undefined) + .join(' '); + + if (roleText.length === 0) return undefined; + if (/application/.test(roleText)) return 'application'; + if (/window/.test(roleText)) return 'window'; + if (/button/.test(roleText)) return 'button'; + if (/keyboard|key/.test(roleText)) return 'keyboard-key'; + if ( + /textfield|text field|searchfield|search field|securetext|textarea|combo box/.test(roleText) + ) { + return 'text-field'; + } + if (/statictext|text/.test(roleText)) return 'text'; + if (/image/.test(roleText)) return 'image'; + if (/switch|checkbox|check box/.test(roleText)) return 'switch'; + if (/slider/.test(roleText)) return 'slider'; + if (/tab/.test(roleText)) return 'tab'; + if (/cell|row/.test(roleText)) return 'cell'; + if (/scroll/.test(roleText)) return 'scroll-view'; + if (/table|list|outline|collection/.test(roleText)) return 'list'; + if (/menu/.test(roleText)) return 'menu'; + return 'other'; +} + +function isVisible(frame: Frame): boolean { + return frame.width > 0 && frame.height > 0; +} + +function framesIntersect(a: Frame, b: Frame): boolean { + return a.x < b.x + b.width && a.x + a.width > b.x && a.y < b.y + b.height && a.y + a.height > b.y; +} + +function pointInsideFrame(point: Point, frame: Frame): boolean { + return ( + point.x >= frame.x && + point.x <= frame.x + frame.width && + point.y >= frame.y && + point.y <= frame.y + frame.height + ); +} + +function hasPointAction(actions: readonly RuntimeActionNameV1[]): boolean { + return actions.some( + (action) => + action === 'tap' || action === 'typeText' || action === 'longPress' || action === 'touch', + ); +} + +function isTapRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'button' || + role === 'cell' || + role === 'keyboard-key' || + role === 'switch' || + role === 'tab' || + role === 'text-field' + ); +} + +function isGenericInternalIdentifier(identifier: string | undefined): boolean { + return identifier === 'label-view'; +} + +function deriveActions(params: { + role: RuntimeElementRoleV1 | undefined; + enabled: boolean; + frame: Frame; + customActions: readonly string[]; + hasSemanticIdentity: boolean; +}): RuntimeActionNameV1[] { + const { role, enabled, frame, customActions, hasSemanticIdentity } = params; + if (!enabled || !isVisible(frame)) { + return []; + } + + const actions = new Set(); + if (isTapRole(role) || (customActions.length > 0 && hasSemanticIdentity)) { + actions.add('tap'); + } + if (role === 'text-field') { + actions.add('typeText'); + } + if (role !== 'application' && role !== 'window') { + actions.add('longPress'); + actions.add('touch'); + } + if (role === 'scroll-view' || role === 'list' || role === 'cell') { + actions.add('swipeWithin'); + } + + return [...actions]; +} + +function hashString(input: string): string { + let hash = 0x811c9dc5; + for (let index = 0; index < input.length; index += 1) { + hash ^= input.charCodeAt(index); + hash = Math.imul(hash, 0x01000193) >>> 0; + } + return hash.toString(36).padStart(7, '0'); +} + +function readChildren(node: AccessibilityNode): AccessibilityNode[] { + return Array.isArray(node.children) ? node.children : []; +} + +function normalizeCustomActions(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.map(normalizeText).filter((entry): entry is string => entry !== undefined); +} + +function readState(node: AccessibilityNode, frame: Frame): RuntimeElementStateV1 | undefined { + const state: RuntimeElementStateV1 = { + enabled: node.enabled !== false, + visible: isVisible(frame), + }; + + if (typeof node.focused === 'boolean') { + state.focused = node.focused; + } else if (typeof node.AXFocused === 'boolean') { + state.focused = node.AXFocused; + } + + if (typeof node.selected === 'boolean') { + state.selected = node.selected; + } else if (typeof node.AXSelected === 'boolean') { + state.selected = node.AXSelected; + } + + return Object.keys(state).length > 0 ? state : undefined; +} + +function stableSignature(params: { + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + path: string; + frame: Frame; +}): string { + return hashString(JSON.stringify(params)); +} + +function normalizeNode(input: NormalizedNodeInput, index: number): RuntimeSnapshotElementRecord { + const { node, path, depth } = input; + const ref = `e${index + 1}`; + const frame = readFrame(node); + const role = deriveRole(node); + const label = readText(node, ['AXLabel', 'title', 'help', 'label']); + const value = readText(node, ['AXValue', 'value']); + const identifier = readText(node, ['AXUniqueId', 'identifier', 'id']); + const enabled = node.enabled !== false; + const customActions = normalizeCustomActions(node.custom_actions); + const actions = deriveActions({ + role, + enabled, + frame, + customActions, + hasSemanticIdentity: + label !== undefined || + value !== undefined || + (identifier !== undefined && !isGenericInternalIdentifier(identifier)), + }); + const state = readState(node, frame); + + return { + publicElement: { + ref, + ...(role ? { role } : {}), + ...(label ? { label } : {}), + ...(value ? { value } : {}), + ...(identifier ? { identifier } : {}), + frame, + ...(state ? { state } : {}), + actions, + }, + metadata: { + path, + depth, + childCount: readChildren(node).length, + signature: stableSignature({ role, label, value, identifier, path, frame }), + }, + rawNode: node, + }; +} + +function isContainerRole(role: RuntimeElementRoleV1 | undefined): boolean { + return ( + role === 'application' || + role === 'window' || + role === 'scroll-view' || + role === 'list' || + role === 'other' + ); +} + +function isDescendantPath(parentPath: string, candidatePath: string): boolean { + return candidatePath.startsWith(`${parentPath}.`); +} + +function isLargeEnoughInferredScrollContainer( + role: RuntimeElementRoleV1 | undefined, + frame: Frame, +): boolean { + if (role !== 'other') { + return true; + } + return frame.width >= 120 && frame.height >= 120; +} + +function frameOverflowsContainer(frame: Frame, containerFrame: Frame): boolean { + const tolerance = 8; + return ( + frame.x < containerFrame.x - tolerance || + frame.y < containerFrame.y - tolerance || + frame.x + frame.width > containerFrame.x + containerFrame.width + tolerance || + frame.y + frame.height > containerFrame.y + containerFrame.height + tolerance + ); +} + +function isSheetGrabberElement(element: RuntimeSnapshotElementRecord): boolean { + return element.publicElement.label?.toLowerCase() === 'sheet grabber'; +} + +function findSheetGrabberDescendant( + element: RuntimeSnapshotElementRecord, + elements: RuntimeSnapshotElementRecord[], +): RuntimeSnapshotElementRecord | null { + return ( + elements.find( + (candidate) => + candidate !== element && + isDescendantPath(element.metadata.path, candidate.metadata.path) && + isSheetGrabberElement(candidate), + ) ?? null + ); +} + +function createSheetSwipeFrame(containerFrame: Frame, grabberFrame: Frame): Frame { + const top = Math.round( + Math.max( + grabberFrame.y + grabberFrame.height + 120, + containerFrame.y + containerFrame.height * 0.35, + ), + ); + const bottom = Math.round(containerFrame.y + containerFrame.height * 0.85); + const height = Math.max(2, bottom - top); + return normalizeFrame({ + x: containerFrame.x, + y: Math.min(top, bottom - 2), + width: containerFrame.width, + height, + }); +} + +function findViewportFrame(elements: RuntimeSnapshotElementRecord[]): Frame | null { + return ( + elements.find( + (element) => + (element.publicElement.role === 'application' || element.publicElement.role === 'window') && + isVisible(element.publicElement.frame), + )?.publicElement.frame ?? null + ); +} + +function applyViewportVisibility(elements: RuntimeSnapshotElementRecord[]): void { + const viewport = findViewportFrame(elements); + if (!viewport) { + return; + } + + for (const element of elements) { + const publicElement = element.publicElement; + if (publicElement.role === 'application' || publicElement.role === 'window') { + continue; + } + + if (!framesIntersect(publicElement.frame, viewport)) { + publicElement.state = { ...publicElement.state, visible: false }; + publicElement.actions = []; + continue; + } + + const activationPoint = getDefaultRuntimeElementActivationPoint(element); + if (!pointInsideFrame(activationPoint, viewport)) { + publicElement.actions = publicElement.actions.filter((action) => action === 'swipeWithin'); + continue; + } + + const adjustedActivationPoint = getBottomClippedActivationPoint(element, viewport); + if (adjustedActivationPoint) { + element.metadata.activationPoint = adjustedActivationPoint; + } + } +} + +function inferScrollableContainers(elements: RuntimeSnapshotElementRecord[]): void { + for (const element of elements) { + const { publicElement, metadata } = element; + if ( + !isContainerRole(publicElement.role) || + !isVisible(publicElement.frame) || + !isLargeEnoughInferredScrollContainer(publicElement.role, publicElement.frame) + ) { + continue; + } + if (publicElement.actions.includes('swipeWithin')) { + continue; + } + + const hasOverflowingDescendant = elements.some((candidate) => { + if (candidate === element) { + return false; + } + return ( + isDescendantPath(metadata.path, candidate.metadata.path) && + frameOverflowsContainer(candidate.publicElement.frame, publicElement.frame) + ); + }); + + const sheetGrabber = + publicElement.role === 'application' || publicElement.role === 'window' + ? findSheetGrabberDescendant(element, elements) + : null; + + if (sheetGrabber) { + publicElement.actions.push('swipeWithin'); + metadata.swipeFrame = createSheetSwipeFrame( + publicElement.frame, + sheetGrabber.publicElement.frame, + ); + continue; + } + + if ( + publicElement.role !== 'application' && + publicElement.role !== 'window' && + hasOverflowingDescendant + ) { + publicElement.actions.push('swipeWithin'); + } + } +} + +function flattenHierarchy(roots: AccessibilityNode[]): NormalizedNodeInput[] { + const flattened: NormalizedNodeInput[] = []; + + function visit(node: AccessibilityNode, path: string, depth: number): void { + flattened.push({ node, path, depth }); + readChildren(node).forEach((child, index) => visit(child, `${path}.${index}`, depth + 1)); + } + + roots.forEach((root, index) => visit(root, String(index), 0)); + return flattened; +} + +function toActionHints(elements: readonly RuntimeElementV1[]): RuntimeActionHintV1[] { + return elements.flatMap((element) => + element.actions.map((action) => ({ + action, + elementRef: element.ref, + ...(element.label ? { label: element.label } : {}), + })), + ); +} + +function createScreenHash(params: { + elements: readonly RuntimeElementV1[]; + actions: readonly RuntimeActionHintV1[]; +}): string { + return hashString( + JSON.stringify({ + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + elements: params.elements, + actions: params.actions, + }), + ); +} + +export function extractAccessibilityHierarchy(responseText: string): AccessibilityNode[] { + let parsed: unknown; + try { + parsed = JSON.parse(responseText) as unknown; + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + throw new RuntimeSnapshotParseError(`AXe describe-ui returned invalid JSON: ${message}`); + } + + if (Array.isArray(parsed)) { + return parsed as AccessibilityNode[]; + } + + if (isRecord(parsed) && Array.isArray(parsed.elements)) { + return parsed.elements as AccessibilityNode[]; + } + + throw new RuntimeSnapshotParseError( + 'AXe describe-ui did not return an accessibility element array.', + ); +} + +export function createRuntimeSnapshotRecord(params: { + simulatorId: string; + uiHierarchy: AccessibilityNode[]; + nowMs?: number; + seq?: number; +}): RuntimeSnapshotRecord { + const capturedAtMs = params.nowMs ?? Date.now(); + const expiresAtMs = capturedAtMs + RUNTIME_SNAPSHOT_TTL_MS; + const elements = flattenHierarchy(params.uiHierarchy).map((input, index) => + normalizeNode(input, index), + ); + applyViewportVisibility(elements); + inferScrollableContainers(elements); + const publicElements = elements.map((element) => element.publicElement); + const actions = toActionHints(publicElements); + const screenHash = createScreenHash({ elements: publicElements, actions }); + const seq = params.seq ?? 0; + const elementsByRef = new Map(elements.map((element) => [element.publicElement.ref, element])); + const payload: RuntimeSnapshotV1 = { + type: 'runtime-snapshot', + protocol: RUNTIME_SNAPSHOT_PROTOCOL, + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + elements: publicElements, + actions, + }; + + return { + simulatorId: params.simulatorId, + screenHash, + seq, + capturedAtMs, + expiresAtMs, + payload, + elements, + elementsByRef, + }; +} + +export function parseRuntimeSnapshotResponse(params: { + simulatorId: string; + responseText: string; + nowMs?: number; +}): RuntimeSnapshotRecord { + return createRuntimeSnapshotRecord({ + simulatorId: params.simulatorId, + uiHierarchy: extractAccessibilityHierarchy(params.responseText), + nowMs: params.nowMs, + }); +} + +export function getPrimaryRuntimeElement( + snapshot: RuntimeSnapshotV1, + action: RuntimeActionNameV1 = 'tap', +): RuntimeElementV1 | null { + return ( + snapshot.elements.find((element) => element.actions.includes(action)) ?? + snapshot.elements[0] ?? + null + ); +} + +export function getRuntimeElementCenter(element: RuntimeSnapshotElementRecord): Point { + const { frame } = element.publicElement; + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getDefaultRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + const { frame, role } = element.publicElement; + if (role === 'switch' && frame.width > 120) { + return { + x: Math.round(frame.x + frame.width - 52), + y: Math.round(frame.y + frame.height / 2), + }; + } + + return getRuntimeElementCenter(element); +} + +function getBottomClippedActivationPoint( + element: RuntimeSnapshotElementRecord, + viewport: Frame, +): Point | null { + if (!hasPointAction(element.publicElement.actions)) { + return null; + } + + const defaultPoint = getDefaultRuntimeElementActivationPoint(element); + const bottomClippedZoneStart = viewport.y + viewport.height * 0.93; + if (defaultPoint.y < bottomClippedZoneStart) { + return null; + } + + const { frame } = element.publicElement; + const verticalOffset = Math.min(Math.max(frame.height * 0.1, 8), frame.height / 2); + const adjustedPoint = { + x: defaultPoint.x, + y: Math.round(frame.y + verticalOffset), + }; + + if (!pointInsideFrame(adjustedPoint, frame) || !pointInsideFrame(adjustedPoint, viewport)) { + return null; + } + + return adjustedPoint; +} + +export function getRuntimeElementActivationPoint(element: RuntimeSnapshotElementRecord): Point { + return element.metadata.activationPoint ?? getDefaultRuntimeElementActivationPoint(element); +} + +export type RuntimeSwipeDirection = 'up' | 'down' | 'left' | 'right'; + +export type RuntimeSwipePointResolution = + | { ok: true; from: Point; to: Point } + | { ok: false; message: string }; + +function isDegenerateSwipe(from: Point, to: Point): boolean { + return from.x === to.x && from.y === to.y; +} + +function getFrameCenter(frame: Frame): Point { + return { + x: Math.round(frame.x + frame.width / 2), + y: Math.round(frame.y + frame.height / 2), + }; +} + +function getRuntimeSwipeCenter( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, + swipeFrame: Frame, +): Point { + const center = getFrameCenter(swipeFrame); + const { role } = element.publicElement; + if ( + (role === 'application' || role === 'window') && + (direction === 'left' || direction === 'right') + ) { + return { x: center.x, y: Math.round(swipeFrame.y + swipeFrame.height * 0.6) }; + } + return center; +} + +export function getRuntimeElementSwipePoints( + element: RuntimeSnapshotElementRecord, + direction: RuntimeSwipeDirection, +): RuntimeSwipePointResolution { + const frame = element.metadata.swipeFrame ?? element.publicElement.frame; + if (frame.width < 2 || frame.height < 2) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' is too small for a reliable swipe.`, + }; + } + + const center = getRuntimeSwipeCenter(element, direction, frame); + const horizontalInset = Math.max(1, Math.min(Math.max(frame.width * 0.15, 24), frame.width / 3)); + const verticalInset = Math.max(1, Math.min(Math.max(frame.height * 0.15, 24), frame.height / 3)); + const left = Math.round(frame.x + horizontalInset); + const right = Math.round(frame.x + frame.width - horizontalInset); + const top = Math.round(frame.y + verticalInset); + const bottom = Math.round(frame.y + frame.height - verticalInset); + + const points = ((): { from: Point; to: Point } => { + switch (direction) { + case 'up': + return { from: { x: center.x, y: bottom }, to: { x: center.x, y: top } }; + case 'down': + return { from: { x: center.x, y: top }, to: { x: center.x, y: bottom } }; + case 'left': + return { from: { x: right, y: center.y }, to: { x: left, y: center.y } }; + case 'right': + return { from: { x: left, y: center.y }, to: { x: right, y: center.y } }; + } + })(); + + if (isDegenerateSwipe(points.from, points.to)) { + return { + ok: false, + message: `Element ref '${element.publicElement.ref}' does not provide non-degenerate ${direction} swipe points.`, + }; + } + + return { ok: true, ...points }; +} diff --git a/src/mcp/tools/ui-automation/shared/semantic-tap.ts b/src/mcp/tools/ui-automation/shared/semantic-tap.ts new file mode 100644 index 000000000..0ea5c2b6e --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/semantic-tap.ts @@ -0,0 +1,138 @@ +import type { CommandExecutor } from '../../../../utils/execution/index.ts'; +import { executeAxeCommand } from './axe-command.ts'; +import type { AxeHelpers } from './axe-command.ts'; +import { getRuntimeElementActivationPoint } from './runtime-snapshot.ts'; +import type { RuntimeSnapshotElementRecord } from '../../../../types/ui-snapshot.ts'; + +export interface SemanticTapCommand { + selectorArgs: string[] | null; + coordinateArgs: string[]; + primaryArgs: string[]; + targetDescription: string; + usedSelector: boolean; +} + +function axeElementTypeFor(element: RuntimeSnapshotElementRecord): string | null { + switch (element.publicElement.role) { + case 'button': + return 'Button'; + case 'cell': + return 'Cell'; + case 'keyboard-key': + return 'Key'; + case 'switch': + return 'Switch'; + case 'tab': + return 'Tab'; + case 'text-field': + return 'TextField'; + default: + return null; + } +} + +export function isRecoverableAxeSelectorError(error: unknown): boolean { + const messageParts = error instanceof Error ? [error.message] : [String(error)]; + if (typeof error === 'object' && error !== null && 'axeOutput' in error) { + const { axeOutput } = error as { axeOutput?: unknown }; + if (typeof axeOutput === 'string') { + messageParts.push(axeOutput); + } + } + + const message = messageParts.join('\n'); + return ( + /multiple(?:\s+\(?\d+\)?)?\s+accessibility\s+elements\s+matched/i.test(message) || + /no\s+accessibility\s+element\s+matched/i.test(message) + ); +} + +function hasDuplicateSelectorMatch(params: { + element: RuntimeSnapshotElementRecord; + elements: readonly RuntimeSnapshotElementRecord[]; + selector: 'identifier' | 'label' | 'value'; + value: string; +}): boolean { + const targetType = axeElementTypeFor(params.element); + const matches = params.elements.filter((candidate) => { + if (axeElementTypeFor(candidate) !== targetType) { + return false; + } + return candidate.publicElement[params.selector] === params.value; + }); + + return matches.length > 1; +} + +export function createSemanticTapCommand( + element: RuntimeSnapshotElementRecord, + elementRef: string, + extraArgs: readonly string[] = [], + elements: readonly RuntimeSnapshotElementRecord[] = [element], +): SemanticTapCommand { + const { identifier, label, value } = element.publicElement; + const activationPoint = getRuntimeElementActivationPoint(element); + const elementType = axeElementTypeFor(element); + const elementTypeArgs = elementType ? ['--element-type', elementType] : []; + const coordinateArgs = + element.publicElement.role === 'switch' + ? [ + 'touch', + '-x', + String(activationPoint.x), + '-y', + String(activationPoint.y), + '--down', + '--up', + ] + : ['tap', '-x', String(activationPoint.x), '-y', String(activationPoint.y), ...extraArgs]; + + const selectorArgs = (() => { + if (element.publicElement.role === 'switch') return null; + if ( + identifier && + !hasDuplicateSelectorMatch({ element, elements, selector: 'identifier', value: identifier }) + ) { + return ['tap', '--id', identifier, ...elementTypeArgs, ...extraArgs]; + } + if ( + label && + !hasDuplicateSelectorMatch({ element, elements, selector: 'label', value: label }) + ) { + return ['tap', '--label', label, ...elementTypeArgs, ...extraArgs]; + } + if (value && !hasDuplicateSelectorMatch({ element, elements, selector: 'value', value })) { + return ['tap', '--value', value, ...elementTypeArgs, ...extraArgs]; + } + return null; + })(); + + return { + selectorArgs, + coordinateArgs, + primaryArgs: selectorArgs ?? coordinateArgs, + targetDescription: selectorArgs + ? `elementRef ${elementRef} semantic selector` + : `elementRef ${elementRef} activation point (${activationPoint.x}, ${activationPoint.y})`, + usedSelector: selectorArgs !== null, + }; +} + +export async function executeSemanticTapWithAmbiguityFallback(params: { + command: SemanticTapCommand; + simulatorId: string; + executor: CommandExecutor; + axeHelpers: AxeHelpers; +}): Promise { + const { command, simulatorId, executor, axeHelpers } = params; + + try { + await executeAxeCommand(command.primaryArgs, simulatorId, 'tap', executor, axeHelpers); + } catch (error) { + if (!command.selectorArgs || !isRecoverableAxeSelectorError(error)) { + throw error; + } + + await executeAxeCommand(command.coordinateArgs, simulatorId, 'tap', executor, axeHelpers); + } +} diff --git a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts index cd0fa28c6..efdb5f15c 100644 --- a/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts +++ b/src/mcp/tools/ui-automation/shared/snapshot-ui-state.ts @@ -1,21 +1,144 @@ -const SNAPSHOT_UI_WARNING_TIMEOUT_MS = 60000; // 60 seconds +import type { + RuntimeActionNameV1, + RuntimeElementResolution, + RuntimeSnapshotLookup, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; -const snapshotUiTimestamps = new Map(); +const runtimeSnapshots = new Map(); +const runtimeSnapshotSeqs = new Map(); -export function recordSnapshotUiCall(simulatorId: string): void { - snapshotUiTimestamps.set(simulatorId, Date.now()); +function snapshotAgeMs(snapshot: RuntimeSnapshotRecord, nowMs: number): number { + return Math.max(0, nowMs - snapshot.capturedAtMs); +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError( + snapshot: RuntimeSnapshotRecord, + nowMs: number, +): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry with a current elementRef.', + snapshotAgeMs: snapshotAgeMs(snapshot, nowMs), + }; +} + +export function recordRuntimeSnapshot(snapshot: RuntimeSnapshotRecord): RuntimeSnapshotRecord { + const nextSeq = (runtimeSnapshotSeqs.get(snapshot.simulatorId) ?? 0) + 1; + runtimeSnapshotSeqs.set(snapshot.simulatorId, nextSeq); + snapshot.seq = nextSeq; + snapshot.payload.seq = nextSeq; + runtimeSnapshots.set(snapshot.simulatorId, snapshot); + return snapshot; +} + +export function clearRuntimeSnapshot(simulatorId: string): void { + runtimeSnapshots.delete(simulatorId); +} + +export function __resetRuntimeSnapshotStoreForTests(): void { + runtimeSnapshots.clear(); + runtimeSnapshotSeqs.clear(); +} + +export function getRuntimeSnapshotLookup( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotLookup { + const snapshot = runtimeSnapshots.get(simulatorId) ?? null; + if (!snapshot) { + return { status: 'missing', snapshot: null }; + } + + const ageMs = snapshotAgeMs(snapshot, nowMs); + if (nowMs > snapshot.expiresAtMs) { + runtimeSnapshots.delete(simulatorId); + return { status: 'expired', snapshot: null, snapshotAgeMs: ageMs }; + } + + return { status: 'available', snapshot, snapshotAgeMs: ageMs }; +} + +export function getRuntimeSnapshot( + simulatorId: string, + nowMs = Date.now(), +): RuntimeSnapshotRecord | null { + return getRuntimeSnapshotLookup(simulatorId, nowMs).snapshot; +} + +export function resolveElementRef( + simulatorId: string, + elementRef: string, + requiredAction: RuntimeActionNameV1, + nowMs = Date.now(), +): RuntimeElementResolution { + const snapshot = runtimeSnapshots.get(simulatorId) ?? null; + if (!snapshot) { + return { ok: false, error: snapshotMissingError() }; + } + + const ageMs = snapshotAgeMs(snapshot, nowMs); + if (nowMs > snapshot.expiresAtMs) { + runtimeSnapshots.delete(simulatorId); + return { ok: false, error: snapshotExpiredError(snapshot, nowMs) }; + } + + const element = snapshot.elementsByRef.get(elementRef); + if (!element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: ageMs, + }, + }; + } + + if (!element.publicElement.actions.includes(requiredAction)) { + return { + ok: false, + error: { + code: 'TARGET_NOT_ACTIONABLE', + message: `Element ref '${elementRef}' does not support '${requiredAction}'.`, + recoveryHint: + 'Choose an elementRef that lists the required action, or refresh with snapshot_ui.', + elementRef, + candidates: snapshot.payload.elements.filter((candidate) => + candidate.actions.includes(requiredAction), + ), + snapshotAgeMs: ageMs, + }, + }; + } + + return { ok: true, snapshot, element, snapshotAgeMs: ageMs }; } export function getSnapshotUiWarning(simulatorId: string): string | null { - const timestamp = snapshotUiTimestamps.get(simulatorId); - if (!timestamp) { - return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots.'; + const lookup = getRuntimeSnapshotLookup(simulatorId); + + if (lookup.status === 'missing') { + return 'Warning: snapshot_ui has not been called yet. Consider using snapshot_ui to capture semantic element references before interacting with the UI.'; } - const timeSinceDescribe = Date.now() - timestamp; - if (timeSinceDescribe > SNAPSHOT_UI_WARNING_TIMEOUT_MS) { - const secondsAgo = Math.round(timeSinceDescribe / 1000); - return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Consider refreshing UI coordinates with snapshot_ui instead of using potentially stale coordinates.`; + if (lookup.status === 'expired') { + const secondsAgo = Math.round((lookup.snapshotAgeMs ?? 0) / 1000); + return `Warning: snapshot_ui was last called ${secondsAgo} seconds ago. Refresh UI element references with snapshot_ui before interacting with the UI.`; } return null; diff --git a/src/mcp/tools/ui-automation/shared/wait-predicate.ts b/src/mcp/tools/ui-automation/shared/wait-predicate.ts new file mode 100644 index 000000000..ffa838975 --- /dev/null +++ b/src/mcp/tools/ui-automation/shared/wait-predicate.ts @@ -0,0 +1,361 @@ +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiAutomationRecoverableError, +} from '../../../../types/ui-snapshot.ts'; +import { getRuntimeSnapshotLookup } from './snapshot-ui-state.ts'; + +export const waitPredicates = [ + 'exists', + 'gone', + 'enabled', + 'focused', + 'textContains', + 'settled', +] as const; + +export type WaitPredicate = (typeof waitPredicates)[number]; +export type SelectorPredicate = Exclude; + +export interface WaitSelector { + elementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface ResolvedWaitSelector { + sourceElementRef?: string; + identifier?: string; + label?: string; + role?: RuntimeElementRoleV1; + value?: string; +} + +export interface WaitEvaluation { + matched: boolean; + candidates?: RuntimeElementV1[]; + uiError?: UiAutomationRecoverableError; +} + +export interface SettledTracker { + signature: string | null; + stableSinceMs: number | null; +} + +function snapshotMissingError(): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_MISSING', + message: 'No runtime UI snapshot is available for this simulator.', + recoveryHint: + 'Run snapshot_ui for this simulator, then retry wait_for_ui with an elementRef from that snapshot.', + }; +} + +function snapshotExpiredError(snapshotAgeMs: number): UiAutomationRecoverableError { + return { + code: 'SNAPSHOT_EXPIRED', + message: 'The runtime UI snapshot for this simulator has expired.', + recoveryHint: 'Run snapshot_ui again and retry wait_for_ui with a current elementRef.', + snapshotAgeMs, + }; +} + +function targetNotFoundError(elementRef: string): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_FOUND', + message: `Element ref '${elementRef}' cannot be converted into a stable wait selector.`, + recoveryHint: + 'Use an element with an identifier, label, or value, or refresh with snapshot_ui and choose a more stable target.', + elementRef, + }; +} + +function normalizedText(value: string | undefined): string { + return value?.replace(/\s+/g, ' ').trim() ?? ''; +} + +function elementTextContains(element: RuntimeElementV1, text: string): boolean { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return false; + } + return ( + normalizedText(element.value).toLowerCase().includes(needle) || + normalizedText(element.label).toLowerCase().includes(needle) + ); +} + +function matchingElementText(element: RuntimeElementV1, text: string): string | null { + const needle = normalizedText(text).toLowerCase(); + if (needle.length === 0) { + return null; + } + + const value = normalizedText(element.value).toLowerCase(); + if (value.includes(needle)) { + return value; + } + + const label = normalizedText(element.label).toLowerCase(); + if (label.includes(needle)) { + return label; + } + + return null; +} + +function candidatesShareMatchingText(candidates: RuntimeElementV1[], text: string): boolean { + const [first, ...remaining] = candidates.map((candidate) => matchingElementText(candidate, text)); + return first !== null && remaining.every((candidateText) => candidateText === first); +} + +function elementSignatures(snapshot: RuntimeSnapshotRecord): string { + return snapshot.elements.map((element) => element.metadata.signature).join('|'); +} + +export function hasSelectorFields(selector: WaitSelector): boolean { + return Boolean( + selector.elementRef || selector.identifier || selector.label || selector.role || selector.value, + ); +} + +export function selectorFromParams(selector: WaitSelector): ResolvedWaitSelector | null { + const resolved: ResolvedWaitSelector = { + ...(selector.identifier ? { identifier: selector.identifier } : {}), + ...(selector.label ? { label: selector.label } : {}), + ...(selector.role ? { role: selector.role } : {}), + ...(selector.value ? { value: selector.value } : {}), + }; + + return hasSelectorFields(resolved) ? resolved : null; +} + +export function resolveElementSelector( + simulatorId: string, + elementRef: string, + nowMs: number, +): + | { ok: true; selector: ResolvedWaitSelector } + | { ok: false; error: UiAutomationRecoverableError } { + const lookup = getRuntimeSnapshotLookup(simulatorId, nowMs); + if (lookup.status === 'missing') { + return { ok: false, error: snapshotMissingError() }; + } + + if (lookup.status === 'expired') { + return { ok: false, error: snapshotExpiredError(lookup.snapshotAgeMs ?? 0) }; + } + + const snapshot = lookup.snapshot; + const element = snapshot?.elementsByRef.get(elementRef); + if (!snapshot || !element) { + return { + ok: false, + error: { + code: 'ELEMENT_REF_NOT_FOUND', + message: `Element ref '${elementRef}' was not found in the current runtime UI snapshot.`, + recoveryHint: + 'Run snapshot_ui again and retry wait_for_ui with an elementRef from the latest snapshot.', + elementRef, + snapshotAgeMs: lookup.snapshotAgeMs ?? 0, + }, + }; + } + + const publicElement = element.publicElement; + if (publicElement.identifier) { + return { + ok: true, + selector: { sourceElementRef: elementRef, identifier: publicElement.identifier }, + }; + } + + if (publicElement.label && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + label: publicElement.label, + role: publicElement.role, + }, + }; + } + + if (publicElement.value && publicElement.role) { + return { + ok: true, + selector: { + sourceElementRef: elementRef, + value: publicElement.value, + role: publicElement.role, + }, + }; + } + + return { ok: false, error: targetNotFoundError(elementRef) }; +} + +function matchSelector( + snapshot: RuntimeSnapshotRecord, + selector: ResolvedWaitSelector, +): RuntimeElementV1[] { + return snapshot.elements + .map((element) => element.publicElement) + .filter((element) => { + if (selector.identifier !== undefined && element.identifier !== selector.identifier) + return false; + if (selector.label !== undefined && element.label !== selector.label) return false; + if (selector.role !== undefined && element.role !== selector.role) return false; + if (selector.value !== undefined && element.value !== selector.value) return false; + return true; + }); +} + +function ambiguousSelectorError( + selector: ResolvedWaitSelector, + candidates: RuntimeElementV1[], +): UiAutomationRecoverableError { + return { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: + 'Provide a more specific selector, or refresh with snapshot_ui and choose a stable elementRef.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates, + }; +} + +function focusedStateUnavailableError( + selector: ResolvedWaitSelector, + candidate: RuntimeElementV1, +): UiAutomationRecoverableError { + return { + code: 'TARGET_NOT_ACTIONABLE', + message: 'The matched runtime UI element does not expose focus state.', + recoveryHint: + 'Use exists, enabled, textContains, or a screenshot-based check for this element instead of focused.', + ...(selector.sourceElementRef ? { elementRef: selector.sourceElementRef } : {}), + candidates: [candidate], + }; +} + +export function evaluateTextContainsPredicate(params: { + snapshot: RuntimeSnapshotRecord; + text: string; +}): WaitEvaluation { + const candidates = params.snapshot.elements + .map((element) => element.publicElement) + .filter((element) => elementTextContains(element, params.text)); + + if (candidates.length > 1) { + if (candidatesShareMatchingText(candidates, params.text)) { + return { matched: true, candidates }; + } + return { + matched: false, + candidates, + uiError: ambiguousSelectorError({}, candidates), + }; + } + + return { matched: candidates.length === 1, candidates }; +} + +export function evaluateElementPredicate(params: { + predicate: SelectorPredicate; + selector: ResolvedWaitSelector; + snapshot: RuntimeSnapshotRecord; + text?: string; +}): WaitEvaluation { + const { predicate, selector, snapshot, text } = params; + const candidates = matchSelector(snapshot, selector); + + if (predicate === 'exists') { + return { matched: candidates.length > 0, candidates }; + } + + if (predicate === 'gone') { + return { matched: candidates.length === 0, candidates }; + } + + if (predicate === 'textContains') { + const textMatches = candidates.filter((candidate) => + elementTextContains(candidate, text ?? ''), + ); + if (textMatches.length > 1) { + if (candidatesShareMatchingText(textMatches, text ?? '')) { + return { matched: true, candidates: textMatches }; + } + return { + matched: false, + candidates: textMatches, + uiError: ambiguousSelectorError(selector, textMatches), + }; + } + return { matched: textMatches.length === 1, candidates: textMatches }; + } + + if (candidates.length > 1) { + return { matched: false, candidates, uiError: ambiguousSelectorError(selector, candidates) }; + } + + const match = candidates[0]; + if (!match) { + return { matched: false, candidates }; + } + + switch (predicate) { + case 'enabled': + return { matched: match.state?.enabled === true, candidates }; + case 'focused': + if (match.state?.focused === undefined) { + return { + matched: false, + candidates, + uiError: focusedStateUnavailableError(selector, match), + }; + } + return { matched: match.state.focused === true, candidates }; + } +} + +export function evaluateSettledPredicate(params: { + snapshot: RuntimeSnapshotRecord; + nowMs: number; + settledDurationMs: number; + tracker: SettledTracker; +}): boolean { + const signature = elementSignatures(params.snapshot); + if (params.tracker.signature !== signature) { + params.tracker.signature = signature; + params.tracker.stableSinceMs = params.nowMs; + return params.settledDurationMs === 0; + } + + const stableSinceMs = params.tracker.stableSinceMs ?? params.nowMs; + params.tracker.stableSinceMs = stableSinceMs; + return params.nowMs - stableSinceMs >= params.settledDurationMs; +} + +export function createWaitTimeoutError(params: { + predicate: WaitPredicate; + timeoutMs: number; + selector?: ResolvedWaitSelector; + candidates?: RuntimeElementV1[]; +}): UiAutomationRecoverableError { + const recoveryHint = params.selector + ? 'Selector fields match exact values. Use textContains for partial visible text, inspect the latest runtime snapshot, or adjust the wait selector.' + : 'Inspect the latest runtime snapshot, adjust the wait selector, or retry later.'; + + return { + code: 'WAIT_TIMEOUT', + message: `Timed out after ${params.timeoutMs}ms waiting for UI predicate '${params.predicate}'.`, + recoveryHint, + timeoutMs: params.timeoutMs, + ...(params.selector?.sourceElementRef ? { elementRef: params.selector.sourceElementRef } : {}), + ...(params.candidates !== undefined ? { candidates: params.candidates } : {}), + }; +} diff --git a/src/mcp/tools/ui-automation/snapshot_ui.ts b/src/mcp/tools/ui-automation/snapshot_ui.ts index 6a5558dcc..9a197a4ee 100644 --- a/src/mcp/tools/ui-automation/snapshot_ui.ts +++ b/src/mcp/tools/ui-automation/snapshot_ui.ts @@ -11,13 +11,11 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { recordSnapshotUiCall } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; -import type { - AccessibilityNode, - CaptureResultDomainResult, -} from '../../../types/domain-results.ts'; +import type { NextStep } from '../../../types/common.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import { createCaptureFailureResult, @@ -25,9 +23,18 @@ import { mapAxeCommandError, setCaptureStructuredOutput, } from './shared/domain-result.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; const snapshotUiSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + sinceScreenHash: z + .string() + .min(1, 'sinceScreenHash must not be empty') + .optional() + .describe('Return an unchanged response when the current screen hash matches this value'), }); type SnapshotUiParams = z.infer; @@ -35,24 +42,72 @@ type SnapshotUiResult = CaptureResultDomainResult; const LOG_PREFIX = '[AXe]'; -function parseUiHierarchy(responseText: string): AccessibilityNode[] | undefined { - try { - const parsed = JSON.parse(responseText) as unknown; - if (Array.isArray(parsed)) { - return parsed as AccessibilityNode[]; - } - if ( - parsed && - typeof parsed === 'object' && - 'elements' in parsed && - Array.isArray((parsed as { elements?: unknown }).elements) - ) { - return (parsed as { elements: AccessibilityNode[] }).elements; - } - } catch { - // ignore +const HIDDEN_TAP_NEXT_STEP_LABELS = new Set(['sheet grabber']); + +const LOW_PRIORITY_TAP_NEXT_STEP_LABELS = new Set([ + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + '±', + '%', + '÷', + '×', + '-', + '+', + '=', +]); + +function compactTapNextStepText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').trim(); +} + +function isHiddenTapNextStepElement(label: string | undefined): boolean { + return HIDDEN_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isLowPriorityTapNextStepElement(label: string | undefined): boolean { + return LOW_PRIORITY_TAP_NEXT_STEP_LABELS.has(compactTapNextStepText(label).toLowerCase()); +} + +function isContentRichTapNextStepElement(element: { + label?: string; + identifier?: string; +}): boolean { + const label = compactTapNextStepText(element.label); + const identifier = compactTapNextStepText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedTapNextStepElement(element: { + state?: { selected?: boolean }; + value?: string; +}): boolean { + return ( + element.state?.selected === true || + compactTapNextStepText(element.value).toLowerCase() === 'selected' + ); +} + +function getTapNextStepElementPriority(element: { + label?: string; + identifier?: string; + state?: { selected?: boolean }; + value?: string; +}): number { + if (isLowPriorityTapNextStepElement(element.label)) { + return 90; } - return undefined; + if (isAlreadySelectedTapNextStepElement(element)) { + return 70; + } + if (isContentRichTapNextStepElement(element)) { + return 0; + } + return 20; } export function createSnapshotUiExecutor( @@ -71,6 +126,7 @@ export function createSnapshotUiExecutor( toolName, }); if (guard.blockedMessage) { + clearRuntimeSnapshot(simulatorId); return createCaptureFailureResult(simulatorId, guard.blockedMessage); } @@ -85,20 +141,43 @@ export function createSnapshotUiExecutor( axeHelpers, ); - recordSnapshotUiCall(simulatorId); + const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText }); + recordRuntimeSnapshot(snapshot); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - const uiHierarchy = parseUiHierarchy(responseText); + if (params.sinceScreenHash === snapshot.screenHash) { + return createCaptureSuccessResult(simulatorId, { + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId, + screenHash: snapshot.screenHash, + seq: snapshot.seq, + }, + warnings: [guard.warningText], + }); + } + return createCaptureSuccessResult(simulatorId, { - capture: uiHierarchy - ? { - type: 'ui-hierarchy', - uiHierarchy, - } - : undefined, + capture: snapshot.payload, warnings: [guard.warningText], }); } catch (error) { + clearRuntimeSnapshot(simulatorId); + + if (error instanceof RuntimeSnapshotParseError) { + const message = 'Failed to parse runtime UI snapshot.'; + log('error', `${LOG_PREFIX}/${toolName}: Failed - ${message}`); + return createCaptureFailureResult(simulatorId, message, { + details: [error.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Run snapshot_ui again after the app is fully launched and responsive.', + }, + }); + } + const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to get accessibility hierarchy.', }); @@ -122,11 +201,56 @@ export async function snapshot_uiLogic( setCaptureStructuredOutput(ctx, result); - ctx.nextStepParams = { - snapshot_ui: { simulatorId: params.simulatorId }, - tap: { simulatorId: params.simulatorId, x: 0, y: 0 }, - screenshot: { simulatorId: params.simulatorId }, - }; + const runtimeSnapshot = + result.capture && 'type' in result.capture && result.capture.type === 'runtime-snapshot' + ? result.capture + : null; + const tapElement = runtimeSnapshot + ? (runtimeSnapshot.elements + .map((element, index) => ({ element, index })) + .filter( + ({ element }) => + element.actions.includes('tap') && + !element.actions.includes('typeText') && + !isHiddenTapNextStepElement(element.label), + ) + .sort((left, right) => { + const priorityDelta = + getTapNextStepElementPriority(left.element) - + getTapNextStepElementPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + })[0]?.element ?? null) + : null; + + if (!result.didError) { + const nextSteps: NextStep[] = [ + { + label: 'Refresh after layout changes', + tool: 'snapshot_ui', + params: { simulatorId: params.simulatorId }, + }, + { + label: 'Wait for UI to settle', + tool: 'wait_for_ui', + params: { simulatorId: params.simulatorId, predicate: 'settled' }, + }, + ...(tapElement + ? [ + { + label: 'Tap an elementRef', + tool: 'tap', + params: { simulatorId: params.simulatorId, elementRef: tapElement.ref }, + }, + ] + : []), + { + label: 'Take screenshot for verification', + tool: 'screenshot', + params: { simulatorId: params.simulatorId }, + }, + ]; + ctx.nextSteps = nextSteps; + } } const publicSchemaObject = z.strictObject( diff --git a/src/mcp/tools/ui-automation/swipe.ts b/src/mcp/tools/ui-automation/swipe.ts index 145f32a4f..f0167de27 100644 --- a/src/mcp/tools/ui-automation/swipe.ts +++ b/src/mcp/tools/ui-automation/swipe.ts @@ -1,7 +1,7 @@ /** * UI Testing Plugin: Swipe * - * Swipe from one coordinate to another on iOS simulator with customizable duration and delta. + * Swipes within a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -17,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementSwipePoints } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; @@ -26,30 +27,32 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const swipeSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x1: z.number().int({ message: 'Start X coordinate' }), - y1: z.number().int({ message: 'Start Y coordinate' }), - x2: z.number().int({ message: 'End X coordinate' }), - y2: z.number().int({ message: 'End Y coordinate' }), + withinElementRef: z.string().min(1, { message: 'withinElementRef must be non-empty' }), + direction: z.enum(['up', 'down', 'left', 'right']).describe('up|down|left|right'), duration: z .number() - .min(0, { message: 'Duration must be non-negative' }) + .positive({ message: 'Duration must be greater than 0 seconds' }) .optional() .describe('seconds'), - delta: z.number().min(0, { message: 'Delta must be non-negative' }).optional(), + distance: z.number().positive({ message: 'Distance must be greater than 0' }).optional(), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); @@ -68,40 +71,57 @@ export function createSwipeExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'swipe'; - const { simulatorId, x1, y1, x2, y2, duration, delta, preDelay, postDelay } = params; - const baseAction = { type: 'swipe' as const }; - const fullAction = { + const { simulatorId, withinElementRef, direction, duration, distance, preDelay, postDelay } = + params; + const action = { type: 'swipe' as const, - from: { x: x1, y: y1 }, - to: { x: x2, y: y2 }, + withinElementRef, + direction, ...(duration !== undefined ? { durationSeconds: duration } : {}), }; + const resolution = resolveElementRef(simulatorId, withinElementRef, 'swipeWithin'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + + const points = getRuntimeElementSwipePoints(resolution.element, direction); + if (!points.ok) { + const uiError = createUiAutomationRecoverableError({ + code: 'TARGET_NOT_ACTIONABLE', + message: points.message, + elementRef: withinElementRef, + }); + return createUiActionFailureResult(action, simulatorId, points.message, { uiError }); + } + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } const commandArgs = [ 'swipe', '--start-x', - String(x1), + String(points.from.x), '--start-y', - String(y1), + String(points.from.y), '--end-x', - String(x2), + String(points.to.x), '--end-y', - String(y2), + String(points.to.y), ]; if (duration !== undefined) { commandArgs.push('--duration', String(duration)); } - if (delta !== undefined) { - commandArgs.push('--delta', String(delta)); + if (distance !== undefined) { + commandArgs.push('--delta', String(distance)); } if (preDelay !== undefined) { commandArgs.push('--pre-delay', String(preDelay)); @@ -110,26 +130,33 @@ export function createSwipeExecutor( commandArgs.push('--post-delay', String(postDelay)); } - const optionsText = duration ? ` duration=${duration}s` : ''; + const optionsText = duration !== undefined ? ` duration=${duration}s` : ''; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting swipe (${x1},${y1})->(${x2},${y2})${optionsText} on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${direction} swipe within ${withinElementRef}${optionsText} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'swipe', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate swipe.', + axeFailureMessage: () => + `Failed to simulate ${direction} swipe within ${withinElementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef: withinElementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/tap.ts b/src/mcp/tools/ui-automation/tap.ts index 960d8bc7f..245d7ead2 100644 --- a/src/mcp/tools/ui-automation/tap.ts +++ b/src/mcp/tools/ui-automation/tap.ts @@ -11,8 +11,12 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; -import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; export type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -20,95 +24,42 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const baseTapSchema = z.object({ +const tapSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z - .number() - .int({ message: 'X coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap X coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - y: z - .number() - .int({ message: 'Y coordinate must be an integer' }) - .optional() - .describe( - 'Fallback tap Y coordinate. Prefer label/id targeting first; use coordinates when accessibility targeting is unavailable.', - ), - id: z - .string() - .min(1, { message: 'Id must be non-empty' }) - .optional() - .describe('Recommended tap target: accessibility element id (AXUniqueId).'), - label: z - .string() - .min(1, { message: 'Label must be non-empty' }) - .optional() - .describe('Recommended when unique: accessibility label (AXLabel).'), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), preDelay: z .number() .min(0, { message: 'Pre-delay must be non-negative' }) + .max(10, { message: 'Pre-delay must be at most 10 seconds' }) .optional() .describe('seconds'), postDelay: z .number() .min(0, { message: 'Post-delay must be non-negative' }) + .max(10, { message: 'Post-delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -const tapSchema = baseTapSchema.superRefine((values, ctx) => { - const hasX = values.x !== undefined; - const hasY = values.y !== undefined; - const hasId = values.id !== undefined; - const hasLabel = values.label !== undefined; - - if (!hasX && !hasY && hasId && hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['id'], - message: 'Provide either id or label, not both.', - }); - } - - if (hasX !== hasY) { - if (!hasX) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'X coordinate is required when y is provided.', - }); - } - if (!hasY) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['y'], - message: 'Y coordinate is required when x is provided.', - }); - } - } - - if (!hasX && !hasY && !hasId && !hasLabel) { - ctx.addIssue({ - code: z.ZodIssueCode.custom, - path: ['x'], - message: 'Provide an element id/label (recommended) or x/y coordinates as fallback.', - }); - } -}); - type TapParams = z.infer; type TapResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(baseTapSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject(tapSchema.omit({ simulatorId: true } as const).shape); const LOG_PREFIX = '[AXe]'; +function delayMs(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + export function createTapExecutor( executor: CommandExecutor, axeHelpers: AxeHelpers = defaultAxeHelpers, @@ -116,15 +67,15 @@ export function createTapExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'tap'; - const { simulatorId, x, y, id, label, preDelay, postDelay } = params; - const action = - x !== undefined && y !== undefined - ? { type: 'tap' as const, x, y } - : id !== undefined - ? { type: 'tap' as const, id } - : label !== undefined - ? { type: 'tap' as const, label } - : { type: 'tap' as const }; + const { simulatorId, elementRef, preDelay, postDelay } = params; + const action = { type: 'tap' as const, elementRef }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'tap'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -135,55 +86,57 @@ export function createTapExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - let targetDescription = ''; - let actionDescription = ''; - let usesCoordinates = false; - const commandArgs = ['tap']; - - if (x !== undefined && y !== undefined) { - usesCoordinates = true; - targetDescription = `(${x}, ${y})`; - actionDescription = `Tap at ${targetDescription}`; - commandArgs.push('-x', String(x), '-y', String(y)); - } else if (id !== undefined) { - targetDescription = `element id "${id}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--id', id); - } else if (label !== undefined) { - targetDescription = `element label "${label}"`; - actionDescription = `Tap on ${targetDescription}`; - commandArgs.push('--label', label); - } else { - return createUiActionFailureResult( - action, - simulatorId, - 'Parameter validation failed: Missing tap target', - ); - } - - if (preDelay !== undefined) { - commandArgs.push('--pre-delay', String(preDelay)); + const usesTouchActivation = resolution.element.publicElement.role === 'switch'; + const extraArgs: string[] = []; + if (!usesTouchActivation && preDelay !== undefined) { + extraArgs.push('--pre-delay', String(preDelay)); } - if (postDelay !== undefined) { - commandArgs.push('--post-delay', String(postDelay)); + if (!usesTouchActivation && postDelay !== undefined) { + extraArgs.push('--post-delay', String(postDelay)); } - - log('info', `${LOG_PREFIX}/${toolName}: Starting for ${targetDescription} on ${simulatorId}`); + const tapCommand = createSemanticTapCommand( + resolution.element, + elementRef, + extraArgs, + resolution.snapshot.elements, + ); + + log( + 'info', + `${LOG_PREFIX}/${toolName}: Starting for ${tapCommand.targetDescription} on ${simulatorId}`, + ); try { - await executeAxeCommand(commandArgs, simulatorId, 'tap', executor, axeHelpers); + if (usesTouchActivation && preDelay !== undefined) { + await delayMs(preDelay * 1000); + } + await executeSemanticTapWithAmbiguityFallback({ + command: tapCommand, + simulatorId, + executor, + axeHelpers, + }); + if (usesTouchActivation && postDelay !== undefined) { + await delayMs(postDelay * 1000); + } + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(action, simulatorId, [ - guard.warningText, - usesCoordinates ? getSnapshotUiWarning(simulatorId) : null, - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => `Failed to simulate ${actionDescription.toLowerCase()}.`, + axeFailureMessage: () => `Failed to simulate tap on elementRef ${elementRef}.`, }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: tapCommand.usedSelector ? 'UI_STATE_CHANGED' : 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; @@ -204,7 +157,7 @@ export async function tapLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: baseTapSchema, + legacy: tapSchema, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/touch.ts b/src/mcp/tools/ui-automation/touch.ts index 650dce8e2..277bbb53b 100644 --- a/src/mcp/tools/ui-automation/touch.ts +++ b/src/mcp/tools/ui-automation/touch.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Touch * - * Perform touch down/up events at specific coordinates. - * Use snapshot_ui for precise coordinates (don't guess from screenshots). + * Performs touch down/up events on a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,7 +17,8 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; -import { getSnapshotUiWarning } from './shared/snapshot-ui-state.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; +import { getRuntimeElementActivationPoint } from './shared/runtime-snapshot.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; @@ -26,27 +26,43 @@ import type { UiActionResultDomainResult } from '../../../types/domain-results.t import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; -const touchSchema = z.object({ +const touchSchemaObject = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), - x: z.number().int({ message: 'X coordinate must be an integer' }), - y: z.number().int({ message: 'Y coordinate must be an integer' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), down: z.boolean().optional(), up: z.boolean().optional(), delay: z .number() .min(0, { message: 'Delay must be non-negative' }) + .max(10, { message: 'Delay must be at most 10 seconds' }) .optional() .describe('seconds'), }); -type TouchParams = z.infer; +function refineTouchDelay(value: z.infer, ctx: z.RefinementCtx): void { + if (value.delay !== undefined && !(value.down === true && value.up === true)) { + ctx.addIssue({ + code: 'custom', + path: ['delay'], + message: 'Delay can only be used when both down and up are true', + }); + } +} + +const touchSchema = touchSchemaObject.superRefine(refineTouchDelay); + +type TouchParams = z.infer; type TouchResult = UiActionResultDomainResult; -const publicSchemaObject = z.strictObject(touchSchema.omit({ simulatorId: true } as const).shape); +const publicSchemaObject = z.strictObject( + touchSchemaObject.omit({ simulatorId: true } as const).shape, +); const LOG_PREFIX = '[AXe]'; @@ -57,29 +73,41 @@ export function createTouchExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'touch'; - const { simulatorId, x, y, down, up, delay } = params; - const actionText = down && up ? 'touch down+up' : down ? 'touch down' : 'touch up'; - const baseAction = { type: 'touch' as const }; - const fullAction = { type: 'touch' as const, event: actionText, x, y }; + const { simulatorId, elementRef, down, up, delay } = params; + const actionText = + down && up ? 'touch down+up' : down ? 'touch down' : up ? 'touch up' : undefined; + const action = { + type: 'touch' as const, + elementRef, + ...(actionText ? { event: actionText } : {}), + }; if (!down && !up) { return createUiActionFailureResult( - baseAction, + action, simulatorId, 'At least one of "down" or "up" must be true', ); } + const resolution = resolveElementRef(simulatorId, elementRef, 'touch'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } + const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, simulatorId, toolName, }); if (guard.blockedMessage) { - return createUiActionFailureResult(baseAction, simulatorId, guard.blockedMessage); + return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['touch', '-x', String(x), '-y', String(y)]; + const center = getRuntimeElementActivationPoint(resolution.element); + const commandArgs = ['touch', '-x', String(center.x), '-y', String(center.y)]; if (down) { commandArgs.push('--down'); } @@ -92,23 +120,29 @@ export function createTouchExecutor( log( 'info', - `${LOG_PREFIX}/${toolName}: Starting ${actionText} at (${x}, ${y}) on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting ${actionText ?? 'touch'} on elementRef ${elementRef} on ${simulatorId}`, ); try { await executeAxeCommand(commandArgs, simulatorId, 'touch', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); - return createUiActionSuccessResult(fullAction, simulatorId, [ - guard.warningText, - getSnapshotUiWarning(simulatorId), - ]); + return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { axeFailureMessage: () => 'Failed to execute touch event.', }); log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); - return createUiActionFailureResult(baseAction, simulatorId, failure.message, { + return createUiActionFailureResult(action, simulatorId, failure.message, { details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; @@ -129,7 +163,7 @@ export async function touchLogic( export const schema = getSessionAwareToolSchemaShape({ sessionAware: publicSchemaObject, - legacy: touchSchema, + legacy: touchSchemaObject, }); export const handler = createSessionAwareTool({ diff --git a/src/mcp/tools/ui-automation/type_text.ts b/src/mcp/tools/ui-automation/type_text.ts index a18c09208..d4999df97 100644 --- a/src/mcp/tools/ui-automation/type_text.ts +++ b/src/mcp/tools/ui-automation/type_text.ts @@ -1,8 +1,7 @@ /** * UI Testing Plugin: Type Text * - * Types text into the iOS Simulator using keyboard input. - * Supports standard US keyboard characters. + * Types text into a semantic UI element from the runtime snapshot store. */ import * as z from 'zod'; @@ -18,22 +17,47 @@ import { getHandlerContext, toInternalSchema, } from '../../../utils/typed-tool-factory.ts'; +import { clearRuntimeSnapshot, resolveElementRef } from './shared/snapshot-ui-state.ts'; import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import { + createSemanticTapCommand, + executeSemanticTapWithAmbiguityFallback, +} from './shared/semantic-tap.ts'; import type { AxeHelpers } from './shared/axe-command.ts'; import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; import type { UiActionResultDomainResult } from '../../../types/domain-results.ts'; import { createUiActionFailureResult, createUiActionSuccessResult, + createUiAutomationRecoverableError, mapAxeCommandError, setUiActionStructuredOutput, + shouldInvalidateRuntimeSnapshotAfterActionError, } from './shared/domain-result.ts'; const LOG_PREFIX = '[AXe]'; +const AXE_UNSUPPORTED_TEXT_MESSAGE = + 'Text contains characters unsupported by AXe typing. AXe type supports US keyboard characters only.'; + +function containsUnsupportedAxeTypeText(text: string): boolean { + for (const character of text) { + const codePoint = character.codePointAt(0); + if (codePoint === undefined || codePoint < 0x20 || codePoint > 0x7e) { + return true; + } + } + + return false; +} const typeTextSchema = z.object({ simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }), text: z.string().min(1, { message: 'Text cannot be empty' }), + replaceExisting: z + .boolean() + .optional() + .describe('Select and replace existing field contents before typing'), }); type TypeTextParams = z.infer; @@ -50,8 +74,15 @@ export function createTypeTextExecutor( ): NonStreamingExecutor { return async (params) => { const toolName = 'type_text'; - const { simulatorId, text } = params; - const action = { type: 'type-text' as const }; + const { simulatorId, elementRef, text, replaceExisting } = params; + const action = { type: 'type-text' as const, elementRef, textLength: text.length }; + + const resolution = resolveElementRef(simulatorId, elementRef, 'typeText'); + if (!resolution.ok) { + return createUiActionFailureResult(action, simulatorId, resolution.error.message, { + uiError: resolution.error, + }); + } const guard = await guardUiAutomationAgainstStoppedDebugger({ debugger: debuggerManager, @@ -62,24 +93,82 @@ export function createTypeTextExecutor( return createUiActionFailureResult(action, simulatorId, guard.blockedMessage); } - const commandArgs = ['type', text]; + if (containsUnsupportedAxeTypeText(text)) { + return createUiActionFailureResult(action, simulatorId, AXE_UNSUPPORTED_TEXT_MESSAGE, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: AXE_UNSUPPORTED_TEXT_MESSAGE, + recoveryHint: 'Use only US keyboard characters supported by AXe type.', + elementRef, + }), + }); + } + + const focusCommand = createSemanticTapCommand( + resolution.element, + elementRef, + [], + resolution.snapshot.elements, + ); + const typeCommandArgs = ['type', text]; log( 'info', - `${LOG_PREFIX}/${toolName}: Starting type "${text.substring(0, 20)}..." on ${simulatorId}`, + `${LOG_PREFIX}/${toolName}: Starting type into elementRef ${elementRef}, length=${text.length} on ${simulatorId}`, ); try { - await executeAxeCommand(commandArgs, simulatorId, 'type', executor, axeHelpers); + await executeSemanticTapWithAmbiguityFallback({ + command: focusCommand, + simulatorId, + executor, + axeHelpers, + }); + } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => `Failed to focus elementRef ${elementRef} before typing.`, + }); + log('error', `${LOG_PREFIX}/${toolName}: Focus failed - ${failure.message}`); + return createUiActionFailureResult(action, simulatorId, failure.message, { + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), + }); + } + + try { + if (replaceExisting === true) { + await executeAxeCommand( + ['key-combo', '--modifiers', '227', '--key', '4'], + simulatorId, + 'key-combo', + executor, + axeHelpers, + ); + } + await executeAxeCommand(typeCommandArgs, simulatorId, 'type', executor, axeHelpers); + clearRuntimeSnapshot(simulatorId); log('info', `${LOG_PREFIX}/${toolName}: Success for ${simulatorId}`); return createUiActionSuccessResult(action, simulatorId, [guard.warningText]); } catch (error) { + if (shouldInvalidateRuntimeSnapshotAfterActionError(error)) { + clearRuntimeSnapshot(simulatorId); + } const failure = mapAxeCommandError(error, { - axeFailureMessage: () => 'Failed to simulate text typing.', + axeFailureMessage: () => `Failed to type text into elementRef ${elementRef}.`, }); - log('error', `${LOG_PREFIX}/${toolName}: Failed - ${failure.message}`); + log('error', `${LOG_PREFIX}/${toolName}: Typing failed - ${failure.message}`); return createUiActionFailureResult(action, simulatorId, failure.message, { - details: failure.diagnostics?.errors.map((entry) => entry.message), + uiError: createUiAutomationRecoverableError({ + code: 'ACTION_FAILED', + message: failure.message, + elementRef, + }), }); } }; diff --git a/src/mcp/tools/ui-automation/wait_for_ui.ts b/src/mcp/tools/ui-automation/wait_for_ui.ts new file mode 100644 index 000000000..3fb55ccbc --- /dev/null +++ b/src/mcp/tools/ui-automation/wait_for_ui.ts @@ -0,0 +1,365 @@ +import * as z from 'zod'; +import { log } from '../../../utils/logging/index.ts'; +import type { CommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultCommandExecutor } from '../../../utils/execution/index.ts'; +import { getDefaultDebuggerManager } from '../../../utils/debugger/index.ts'; +import type { DebuggerManager } from '../../../utils/debugger/debugger-manager.ts'; +import { guardUiAutomationAgainstStoppedDebugger } from '../../../utils/debugger/ui-automation-guard.ts'; +import { + createSessionAwareTool, + getSessionAwareToolSchemaShape, + getHandlerContext, + toInternalSchema, +} from '../../../utils/typed-tool-factory.ts'; +import type { CaptureResultDomainResult } from '../../../types/domain-results.ts'; +import type { NonStreamingExecutor } from '../../../types/tool-execution.ts'; +import type { + RuntimeElementRoleV1, + RuntimeElementV1, + RuntimeSnapshotRecord, + UiWaitMatch, +} from '../../../types/ui-snapshot.ts'; +import { executeAxeCommand, defaultAxeHelpers } from './shared/axe-command.ts'; +import type { AxeHelpers } from './shared/axe-command.ts'; +import { clearRuntimeSnapshot, recordRuntimeSnapshot } from './shared/snapshot-ui-state.ts'; +import { + parseRuntimeSnapshotResponse, + RuntimeSnapshotParseError, +} from './shared/runtime-snapshot.ts'; +import { + createCaptureFailureResult, + createCaptureSuccessResult, + mapAxeCommandError, + setCaptureStructuredOutput, +} from './shared/domain-result.ts'; +import { + createWaitTimeoutError, + evaluateElementPredicate, + evaluateSettledPredicate, + evaluateTextContainsPredicate, + hasSelectorFields, + resolveElementSelector, + selectorFromParams, + waitPredicates, +} from './shared/wait-predicate.ts'; +import type { ResolvedWaitSelector, SettledTracker } from './shared/wait-predicate.ts'; + +const DEFAULT_TIMEOUT_MS = 5_000; +const DEFAULT_POLL_INTERVAL_MS = 250; +const DEFAULT_SETTLED_DURATION_MS = 500; +const LOG_PREFIX = '[AXe]'; + +const waitForUiSchemaShape = { + simulatorId: z.uuid({ message: 'Invalid Simulator UUID format' }), + predicate: z.enum(waitPredicates), + elementRef: z.string().min(1, { message: 'elementRef must be non-empty' }).optional(), + identifier: z.string().min(1, { message: 'identifier must be non-empty' }).optional(), + label: z.string().min(1, { message: 'label must be non-empty' }).optional(), + role: z + .enum([ + 'application', + 'button', + 'cell', + 'image', + 'keyboard-key', + 'list', + 'menu', + 'other', + 'scroll-view', + 'slider', + 'switch', + 'tab', + 'text', + 'text-field', + 'window', + ] satisfies RuntimeElementRoleV1[]) + .optional(), + value: z.string().min(1, { message: 'value must be non-empty' }).optional(), + text: z + .string() + .min(1, { message: 'text must be non-empty' }) + .refine((value) => value.replace(/\s+/g, ' ').trim().length > 0, { + message: 'text must contain non-whitespace characters', + }) + .optional(), + timeoutMs: z + .number() + .int({ message: 'timeoutMs must be an integer number of milliseconds' }) + .min(0, { message: 'timeoutMs must be non-negative' }) + .optional() + .describe('milliseconds'), + pollIntervalMs: z + .number() + .int({ message: 'pollIntervalMs must be an integer number of milliseconds' }) + .min(1, { message: 'pollIntervalMs must be at least 1 millisecond' }) + .optional() + .describe('milliseconds'), + settledDurationMs: z + .number() + .int({ message: 'settledDurationMs must be an integer number of milliseconds' }) + .min(0, { message: 'settledDurationMs must be non-negative' }) + .optional() + .describe('milliseconds'), +}; + +const waitForUiSchema = z.strictObject(waitForUiSchemaShape).superRefine((value, ctx) => { + if ( + value.predicate !== 'settled' && + value.predicate !== 'textContains' && + !hasSelectorFields(value) + ) { + ctx.addIssue({ + code: 'custom', + path: ['elementRef'], + message: `${value.predicate} waits require at least one selector field`, + }); + } + + if (value.predicate === 'textContains' && value.text === undefined) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'textContains waits require text', + }); + } + + if (value.predicate !== 'textContains' && value.text !== undefined) { + ctx.addIssue({ + code: 'custom', + path: ['text'], + message: 'text is only supported for textContains waits', + }); + } +}); + +type WaitForUiParams = z.infer; +type WaitForUiResult = CaptureResultDomainResult; + +interface WaitTiming { + now: () => number; + sleep: (durationMs: number) => Promise; +} + +function defaultSleep(durationMs: number): Promise { + return new Promise((resolve) => { + setTimeout(resolve, durationMs); + }); +} + +function createWaitMatch( + predicate: WaitForUiParams['predicate'], + matches: RuntimeElementV1[] | undefined, +): UiWaitMatch | undefined { + if (predicate === 'settled' || matches === undefined) { + return undefined; + } + return { predicate, matches }; +} + +export function createWaitForUiExecutor( + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing: WaitTiming = { now: Date.now, sleep: defaultSleep }, +): NonStreamingExecutor { + return async (params) => { + const toolName = 'wait_for_ui'; + const { simulatorId, predicate, elementRef, text } = params; + const timeoutMs = params.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const pollIntervalMs = params.pollIntervalMs ?? DEFAULT_POLL_INTERVAL_MS; + const settledDurationMs = params.settledDurationMs ?? DEFAULT_SETTLED_DURATION_MS; + const startedAtMs = timing.now(); + const deadlineMs = startedAtMs + timeoutMs; + let selector: ResolvedWaitSelector | null = null; + if (predicate !== 'settled') { + if (elementRef) { + const selectorResolution = resolveElementSelector(simulatorId, elementRef, startedAtMs); + if (!selectorResolution.ok) { + return createCaptureFailureResult(simulatorId, selectorResolution.error.message, { + uiError: selectorResolution.error, + }); + } + selector = selectorResolution.selector; + } else { + selector = selectorFromParams(params); + } + } + + if (predicate !== 'settled' && predicate !== 'textContains' && !selector) { + const message = `${predicate} waits require at least one selector field.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: 'TARGET_NOT_FOUND', + message, + recoveryHint: + 'Provide elementRef, identifier, label, role, or value, or use settled for selector-free waits.', + }, + }); + } + + const guard = await guardUiAutomationAgainstStoppedDebugger({ + debugger: debuggerManager, + simulatorId, + toolName, + }); + if (guard.blockedMessage) { + clearRuntimeSnapshot(simulatorId); + return createCaptureFailureResult(simulatorId, guard.blockedMessage); + } + + let latestSnapshot: RuntimeSnapshotRecord | null = null; + let latestCandidates: RuntimeElementV1[] = []; + let lastParseError: RuntimeSnapshotParseError | null = null; + let lastPollError: string | null = null; + const settledTracker: SettledTracker = { signature: null, stableSinceMs: null }; + + log('info', `${LOG_PREFIX}/${toolName}: Waiting for ${predicate} on ${simulatorId}`); + + while (true) { + try { + const responseText = await executeAxeCommand( + ['describe-ui'], + simulatorId, + 'describe-ui', + executor, + axeHelpers, + ); + const nowMs = timing.now(); + const snapshot = parseRuntimeSnapshotResponse({ simulatorId, responseText, nowMs }); + latestSnapshot = snapshot; + lastParseError = null; + lastPollError = null; + recordRuntimeSnapshot(snapshot); + + const matched = + predicate === 'settled' + ? evaluateSettledPredicate({ + snapshot, + nowMs, + settledDurationMs, + tracker: settledTracker, + }) + : predicate === 'textContains' && !selector + ? evaluateTextContainsPredicate({ snapshot, text: text! }) + : evaluateElementPredicate({ predicate, selector: selector!, snapshot, text }); + + if (typeof matched === 'boolean') { + if (matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + }); + } + } else { + latestCandidates = matched.candidates ?? []; + if (matched.uiError) { + return createCaptureFailureResult(simulatorId, matched.uiError.message, { + warnings: [guard.warningText], + uiError: matched.uiError, + capture: snapshot.payload, + }); + } + if (matched.matched) { + return createCaptureSuccessResult(simulatorId, { + capture: snapshot.payload, + warnings: [guard.warningText], + waitMatch: createWaitMatch(predicate, matched.candidates), + }); + } + } + } catch (error) { + if (error instanceof RuntimeSnapshotParseError) { + lastParseError = error; + lastPollError = null; + } else { + const failure = mapAxeCommandError(error, { + axeFailureMessage: () => 'Failed to poll runtime UI snapshot.', + }); + lastPollError = failure.message; + lastParseError = null; + } + } + + const nowMs = timing.now(); + if (nowMs >= deadlineMs) { + break; + } + + await timing.sleep(Math.min(pollIntervalMs, deadlineMs - nowMs)); + } + + if (latestSnapshot) { + const uiError = createWaitTimeoutError({ + predicate, + timeoutMs, + selector: selector ?? undefined, + candidates: latestCandidates, + }); + return createCaptureFailureResult(simulatorId, uiError.message, { + warnings: [guard.warningText], + uiError, + capture: latestSnapshot.payload, + }); + } + + clearRuntimeSnapshot(simulatorId); + if (lastParseError) { + const message = 'Failed to parse runtime UI snapshot while waiting for UI.'; + return createCaptureFailureResult(simulatorId, message, { + details: [lastParseError.message], + uiError: { + code: 'SNAPSHOT_PARSE_FAILED', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + }, + }); + } + + const message = + lastPollError ?? `Timed out after ${timeoutMs}ms waiting for UI predicate '${predicate}'.`; + return createCaptureFailureResult(simulatorId, message, { + uiError: { + code: lastPollError ? 'ACTION_FAILED' : 'WAIT_TIMEOUT', + message, + recoveryHint: 'Retry after the app is fully launched and responsive.', + ...(lastPollError ? {} : { timeoutMs }), + }, + }); + }; +} + +export async function wait_for_uiLogic( + params: WaitForUiParams, + executor: CommandExecutor, + axeHelpers: AxeHelpers = defaultAxeHelpers, + debuggerManager: DebuggerManager = getDefaultDebuggerManager(), + timing?: WaitTiming, +): Promise { + const ctx = getHandlerContext(); + const executeWaitForUi = createWaitForUiExecutor(executor, axeHelpers, debuggerManager, timing); + const result = await executeWaitForUi(params); + + setCaptureStructuredOutput(ctx, result, { headerTitle: 'Wait for UI' }); + + ctx.nextStepParams = { + snapshot_ui: { simulatorId: params.simulatorId }, + wait_for_ui: { simulatorId: params.simulatorId, predicate: 'settled' }, + }; +} + +const publicSchemaObject = z.strictObject( + z.object(waitForUiSchemaShape).omit({ simulatorId: true } as const).shape, +); + +export const schema = getSessionAwareToolSchemaShape({ + sessionAware: publicSchemaObject, + legacy: waitForUiSchema, +}); + +export const handler = createSessionAwareTool({ + internalSchema: toInternalSchema(waitForUiSchema), + logicFunction: (params: WaitForUiParams, executor: CommandExecutor) => + wait_for_uiLogic(params, executor, defaultAxeHelpers), + getExecutor: getDefaultCommandExecutor, + requirements: [{ allOf: ['simulatorId'], message: 'simulatorId is required' }], +}); diff --git a/src/rendering/render.ts b/src/rendering/render.ts index f9eabae56..c00fac689 100644 --- a/src/rendering/render.ts +++ b/src/rendering/render.ts @@ -97,6 +97,7 @@ function createRenderHooks( runtime?: FilePathRenderRuntime; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; }, ): RenderSessionHooks { const suppressWarnings = sessionStore.get('suppressWarnings'); @@ -120,6 +121,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }), }; case 'raw': @@ -146,6 +148,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); if (text) { process.stdout.write(text); @@ -160,6 +163,7 @@ function createRenderHooks( showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps: options.includeNextSteps ?? true, }); return { @@ -180,6 +184,7 @@ export interface RenderSessionOptions { runtime?: FilePathRenderRuntime; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export function createRenderSession( @@ -194,7 +199,10 @@ export function createRenderSession( export function renderTranscript( input: RenderTranscriptInput, strategy: RenderStrategy, - options?: Pick, + options?: Pick< + RenderSessionOptions, + 'runtime' | 'filePathRenderStyle' | 'includeHeaderDetails' | 'includeNextSteps' + >, ): string { return createRenderHooks(strategy, { ...options, interactive: false }).finalize(input); } diff --git a/src/runtime/__tests__/tool-invoker.test.ts b/src/runtime/__tests__/tool-invoker.test.ts index 397a2fdd8..32ba1fed1 100644 --- a/src/runtime/__tests__/tool-invoker.test.ts +++ b/src/runtime/__tests__/tool-invoker.test.ts @@ -664,6 +664,50 @@ describe('DefaultToolInvoker next steps post-processing', () => { expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id "123"'); }); + it('prefers the current workflow when normalizing duplicate next-step tool names', async () => { + const directHandler = emitNextStepsHandler('ok', [ + { + tool: 'screenshot', + label: 'Take screenshot', + params: { simulatorId: '123' }, + }, + ]); + + const catalog = createToolCatalog([ + makeTool({ + id: 'snapshot_ui', + cliName: 'snapshot-ui', + mcpName: 'snapshot_ui', + workflow: 'ui-automation', + stateful: false, + handler: directHandler, + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'simulator', + stateful: false, + handler: emitHandler('simulator screenshot'), + }), + makeTool({ + id: 'screenshot', + cliName: 'screenshot', + mcpName: 'screenshot', + workflow: 'ui-automation', + stateful: false, + handler: emitHandler('ui screenshot'), + }), + ]); + + const invoker = new DefaultToolInvoker(catalog); + const response = await invokeAndFinalize(invoker, 'snapshot-ui', {}, { runtime: 'cli' }); + + const text = response.content.map((c) => (c.type === 'text' ? c.text : '')).join('\n'); + expect(text).toContain('xcodebuildmcp ui-automation screenshot --simulator-id "123"'); + expect(text).not.toContain('xcodebuildmcp simulator screenshot --simulator-id "123"'); + }); + it('injects manifest template next steps from dynamic nextStepParams when response omits nextSteps', async () => { const directHandler = emitNextStepsHandler('ok', undefined, { snapshot_ui: { simulatorId: '12345678-1234-4234-8234-123456789012' }, diff --git a/src/runtime/tool-invoker.ts b/src/runtime/tool-invoker.ts index 75935866c..1f82f2abe 100644 --- a/src/runtime/tool-invoker.ts +++ b/src/runtime/tool-invoker.ts @@ -135,13 +135,31 @@ function mergeTemplateAndResponseNextSteps( }); } -function normalizeNextSteps(steps: NextStep[], catalog: ToolCatalog): NextStep[] { +function getNextStepTarget(params: { + catalog: ToolCatalog; + mcpName: string; + preferredWorkflow: string; +}): ToolDefinition | null { + return ( + params.catalog.tools.find( + (tool) => + tool.mcpName.toLowerCase() === params.mcpName.toLowerCase().trim() && + tool.workflow === params.preferredWorkflow, + ) ?? params.catalog.getByMcpName(params.mcpName) + ); +} + +function normalizeNextSteps( + steps: NextStep[], + catalog: ToolCatalog, + preferredWorkflow: string, +): NextStep[] { return steps.map((step) => { if (!step.tool) { return step; } - const target = catalog.getByMcpName(step.tool); + const target = getNextStepTarget({ catalog, mcpName: step.tool, preferredWorkflow }); if (!target) { return step; } @@ -238,7 +256,7 @@ export function postProcessSession(params: { return; } - const normalized = normalizeNextSteps(finalSteps, catalog); + const normalized = normalizeNextSteps(finalSteps, catalog, tool.workflow); if (normalized.length > 0) { session.setNextSteps?.(normalized, runtime); diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--error-no-simulator.txt index b04be39af..adaba8754 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--success.txt index 678f04137..dfce2922b 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/long-press--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +✅ Long press on elementRef e3 for 500ms simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/snapshot-ui--success.txt index 4e62b4e39..0dc63b95f 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/snapshot-ui--success.txt @@ -3,586 +3,36 @@ Simulator: -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "±", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "÷", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|±| + e5|tap|button|%| + e6|tap|button|÷| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: 1. Refresh after layout changes: xcodebuildmcp simulator snapshot-ui --simulator-id "" -2. Tap on element: xcodebuildmcp ui-automation tap --simulator-id "" --x "0" --y "0" -3. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id "" +2. Wait for UI to settle: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" +3. Tap an elementRef: xcodebuildmcp ui-automation tap --simulator-id "" --element-ref "e3" +4. Take screenshot for verification: xcodebuildmcp simulator screenshot --simulator-id "" diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-no-simulator.txt index 4716920bc..935b744b8 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..3cdc8d292 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,11 @@ + +👆 Swipe + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--success.txt deleted file mode 100644 index f78015c16..000000000 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/swipe--success.txt +++ /dev/null @@ -1,10 +0,0 @@ - -👆 Swipe - - Simulator: - -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--error-no-simulator.txt index d45f020a2..726bd1eb3 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--success.txt index d4a41a58c..bc58f3e30 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/tap--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +✅ Tap on elementRef e3 simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--error-no-simulator.txt index 751d9f3fd..8a7e5ad50 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--success.txt index 5197f0e74..ea972a7a7 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/touch--success.txt @@ -3,8 +3,4 @@ Simulator: -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +✅ Touch event (touch down+up) on elementRef e3 executed successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-no-simulator.txt index bba706413..ccdd1e70e 100644 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-no-simulator.txt @@ -3,8 +3,8 @@ Simulator: -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..5d72e95f4 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,11 @@ + +⌨️ Type Text + + Simulator: + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--success.txt deleted file mode 100644 index 72a6ac50b..000000000 --- a/src/snapshot-tests/__fixtures__/cli/ui-automation/type-text--success.txt +++ /dev/null @@ -1,6 +0,0 @@ - -⌨️ Type Text - - Simulator: - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/cli/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/cli/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..1e74f961e --- /dev/null +++ b/src/snapshot-tests/__fixtures__/cli/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,36 @@ + +⚙️ Wait for UI + + Simulator: + +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|±| + e5|tap|button|%| + e6|tap|button|÷| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| + +Tips + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. + +Next steps: +1. Refresh runtime snapshot: xcodebuildmcp simulator snapshot-ui --simulator-id "SIMULATOR_UUID" +2. Wait again: xcodebuildmcp ui-automation wait-for-ui --simulator-id "SIMULATOR_UUID" --predicate "settled" diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/button--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/button--error-no-simulator.json index 1318557a0..4eb88bfb3 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/button--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/button--error-no-simulator.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, "error": "Failed to press button 'home'.", "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/button--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/button--success.json index 6d617298b..7df9f68e4 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/button--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/button--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--error-no-simulator.json index 5f778f391..d83e0ab6e 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--error-no-simulator.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, "error": "Failed to execute gesture 'scroll-down'.", "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--success.json index a35585b83..39cb6883a 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/gesture--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--error-no-simulator.json index 309758ecf..27466bd7b 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--error-no-simulator.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, "error": "Failed to simulate key press (code: 4).", "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--success.json index a0b984d98..bbfc05f0e 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/key-press--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--error-no-simulator.json index 882217c5b..81e0e2fdc 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--error-no-simulator.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, "error": "Failed to execute key sequence.", "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--success.json index 82632eecb..4ef507f23 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/key-sequence--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--error-no-simulator.json index 433768d2c..4bef73811 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--error-no-simulator.json @@ -1,28 +1,24 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, - "error": "Failed to simulate long press at (100, 400).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--success.json index fa9c812f7..8621e1a67 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/long-press--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { @@ -9,20 +9,11 @@ }, "action": { "type": "long-press", - "x": 100, - "y": 400, + "elementRef": "e3", "durationMs": 500 }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--error-no-simulator.json index cabc3e10e..4e803b712 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--error-no-simulator.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.capture-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, "error": "Failed to get accessibility hierarchy.", "data": { diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--success.json index 4c80f145d..de6328de9 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/snapshot-ui--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.capture-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { @@ -11,493 +11,34 @@ "simulatorId": "" }, "capture": { - "type": "ui-hierarchy", - "uiHierarchy": [ - { - "AXFrame": "{{0, 0}, {402, 874}}", - "AXUniqueId": null, - "frame": { - "y": 0, - "x": 0, - "width": 402, - "height": 874 - }, - "role_description": "application", - "AXLabel": "Calculator", - "content_required": false, - "type": "Application", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXApplication", - "children": [ - { - "AXFrame": "{{344, 250.5}, {34, 67}}", - "AXUniqueId": null, - "frame": { - "y": 250.5, - "x": 344, - "width": 34, - "height": 67 - }, - "role_description": "text", - "AXLabel": "0", - "content_required": false, - "type": "StaticText", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXStaticText", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 357.5, - "x": 19.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "C", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 357.5, - "x": 113.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "±", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 357.5, - "x": 206.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "%", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 357.5, - "x": 300.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "÷", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 449.5, - "x": 19.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "7", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 449.5, - "x": 113.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "8", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 449.5, - "x": 206.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "9", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 449.5, - "x": 300.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "×", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 541.5, - "x": 19.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "4", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 541.5, - "x": 113.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "5", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 541.5, - "x": 206.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "6", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 541.5, - "x": 300.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "-", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 633.5, - "x": 19.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "1", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 633.5, - "x": 113.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "2", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 633.5, - "x": 206.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": "3", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 633.5, - "x": 300.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "+", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 725.5, - "x": 113.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "0", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId": null, - "frame": { - "y": 725.5, - "x": 206.5, - "width": 82.7, - "height": 81 - }, - "role_description": "button", - "AXLabel": ".", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - }, - { - "AXFrame": "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId": null, - "frame": { - "y": 725.5, - "x": 300.2, - "width": 82.3, - "height": 81 - }, - "role_description": "button", - "AXLabel": "=", - "content_required": false, - "type": "Button", - "title": null, - "help": null, - "custom_actions": [], - "AXValue": null, - "enabled": true, - "role": "AXButton", - "children": [], - "subrole": null, - "pid": 99999 - } - ], - "subrole": null, - "pid": 99999 - } - ] + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e3|tap|button|C||", + "e4|tap|button|±||", + "e5|tap|button|%||", + "e6|tap|button|÷||", + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e10|tap|button|×||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e14|tap|button|-||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e18|tap|button|+||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e21|tap|button|=||" + ], + "scroll": [], + "udid": "" } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-no-simulator.json index d610d58b3..29c92f1c9 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-no-simulator.json @@ -1,25 +1,24 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, - "error": "Failed to simulate swipe.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "swipe" + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-not-actionable.json b/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-not-actionable.json new file mode 100644 index 000000000..d7d3c80b9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'swipeWithin'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "swipe", + "withinElementRef": "e3", + "direction": "up" + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'swipeWithin'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--success.json deleted file mode 100644 index 65fe3640a..000000000 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/swipe--success.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "swipe", - "from": { - "x": 200, - "y": 400 - }, - "to": { - "x": 200, - "y": 200 - } - }, - "artifacts": { - "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] - } - } -} diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/tap--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/tap--error-no-simulator.json index 7c22bc9f3..ba680e80d 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/tap--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/tap--error-no-simulator.json @@ -1,27 +1,23 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, - "error": "Failed to simulate tap at (100, 100).", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { "type": "tap", - "x": 100, - "y": 100 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/tap--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/tap--success.json index 6792acf87..7b215e9c7 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/tap--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/tap--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { @@ -9,19 +9,10 @@ }, "action": { "type": "tap", - "x": 100, - "y": 400 + "elementRef": "e3" }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/touch--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/touch--error-no-simulator.json index bf72ea709..9b589c7d6 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/touch--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/touch--error-no-simulator.json @@ -1,25 +1,24 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, - "error": "Failed to execute touch event.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "touch" + "type": "touch", + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/touch--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/touch--success.json index 30f6a0db4..365d4ca63 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/touch--success.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/touch--success.json @@ -1,6 +1,6 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": false, "error": null, "data": { @@ -9,20 +9,11 @@ }, "action": { "type": "touch", - "event": "touch down+up", - "x": 100, - "y": 400 + "elementRef": "e3", + "event": "touch down+up" }, "artifacts": { "simulatorId": "" - }, - "diagnostics": { - "warnings": [ - { - "message": "snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots." - } - ], - "errors": [] } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-no-simulator.json b/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-no-simulator.json index bf1e185a1..ffb164bb3 100644 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-no-simulator.json +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-no-simulator.json @@ -1,25 +1,24 @@ { "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", + "schemaVersion": "2", "didError": true, - "error": "Failed to simulate text typing.", + "error": "No runtime UI snapshot is available for this simulator.", "data": { "summary": { "status": "FAILED" }, "action": { - "type": "type-text" + "type": "type-text", + "elementRef": "e3", + "textLength": 5 }, "artifacts": { "simulatorId": "" }, - "diagnostics": { - "warnings": [], - "errors": [ - { - "message": "CLIError(errorDescription: \"Simulator with UDID not found in set.\")" - } - ] + "uiError": { + "code": "SNAPSHOT_MISSING", + "message": "No runtime UI snapshot is available for this simulator.", + "recoveryHint": "Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot." } } } diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-not-actionable.json b/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-not-actionable.json new file mode 100644 index 000000000..e403f06f9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--error-not-actionable.json @@ -0,0 +1,27 @@ +{ + "schema": "xcodebuildmcp.output.ui-action-result", + "schemaVersion": "2", + "didError": true, + "error": "Element ref 'e3' does not support 'typeText'.", + "data": { + "summary": { + "status": "FAILED" + }, + "action": { + "type": "type-text", + "elementRef": "e3", + "textLength": 5 + }, + "artifacts": { + "simulatorId": "" + }, + "uiError": { + "code": "TARGET_NOT_ACTIONABLE", + "message": "Element ref 'e3' does not support 'typeText'.", + "recoveryHint": "Choose an elementRef that lists the required action, or refresh with snapshot_ui.", + "elementRef": "e3", + "candidates": [], + "snapshotAgeMs": 1234 + } + } +} diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--success.json deleted file mode 100644 index 4e9ddb72d..000000000 --- a/src/snapshot-tests/__fixtures__/json/ui-automation/type-text--success.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "schema": "xcodebuildmcp.output.ui-action-result", - "schemaVersion": "1", - "didError": false, - "error": null, - "data": { - "summary": { - "status": "SUCCEEDED" - }, - "action": { - "type": "type-text" - }, - "artifacts": { - "simulatorId": "" - } - } -} diff --git a/src/snapshot-tests/__fixtures__/json/ui-automation/wait-for-ui--success.json b/src/snapshot-tests/__fixtures__/json/ui-automation/wait-for-ui--success.json new file mode 100644 index 000000000..de6328de9 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/json/ui-automation/wait-for-ui--success.json @@ -0,0 +1,44 @@ +{ + "schema": "xcodebuildmcp.output.capture-result", + "schemaVersion": "2", + "didError": false, + "error": null, + "data": { + "summary": { + "status": "SUCCEEDED" + }, + "artifacts": { + "simulatorId": "" + }, + "capture": { + "type": "runtime-snapshot", + "rs": "1", + "screenHash": "", + "seq": 1, + "count": 21, + "targets": [ + "e3|tap|button|C||", + "e4|tap|button|±||", + "e5|tap|button|%||", + "e6|tap|button|÷||", + "e7|tap|button|7||", + "e8|tap|button|8||", + "e9|tap|button|9||", + "e10|tap|button|×||", + "e11|tap|button|4||", + "e12|tap|button|5||", + "e13|tap|button|6||", + "e14|tap|button|-||", + "e15|tap|button|1||", + "e16|tap|button|2||", + "e17|tap|button|3||", + "e18|tap|button|+||", + "e19|tap|button|0||", + "e20|tap|button|.||", + "e21|tap|button|=||" + ], + "scroll": [], + "udid": "" + } + } +} diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--error-no-simulator.txt index 10acee620..3eb76d15a 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Long Press -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate long press at (100, 400). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--success.txt index faeec8350..46711e63c 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/long-press--success.txt @@ -1,8 +1,4 @@ 👆 Long Press -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Long press at (100, 400) for 500ms simulated successfully. +✅ Long press on elementRef e3 for 500ms simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/snapshot-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/snapshot-ui--success.txt index 5766d2720..014b6969e 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/snapshot-ui--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/snapshot-ui--success.txt @@ -1,586 +1,36 @@ 📷 Snapshot UI -Accessibility Hierarchy - ```json - [ - { - "AXFrame" : "{{0, 0}, {402, 874}}", - "AXUniqueId" : null, - "frame" : { - "y" : 0, - "x" : 0, - "width" : 402, - "height" : 874 - }, - "role_description" : "application", - "AXLabel" : "Calculator", - "content_required" : false, - "type" : "Application", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXApplication", - "children" : [ - { - "AXFrame" : "{{344, 250.5}, {34, 67}}", - "AXUniqueId" : null, - "frame" : { - "y" : 250.5, - "x" : 344, - "width" : 34, - "height" : 67 - }, - "role_description" : "text", - "AXLabel" : "0", - "content_required" : false, - "type" : "StaticText", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXStaticText", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "C", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "±", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 357.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "%", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 357.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 357.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "÷", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "7", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "8", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 449.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "9", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 449.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 449.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "×", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "4", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "5", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 541.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "6", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 541.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 541.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "-", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{19.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 19.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "1", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "2", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 633.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "3", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 633.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 633.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "+", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{113.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 113.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "0", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{206.5, 725.5}, {82.7, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 206.5, - "width" : 82.7, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : ".", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - }, - { - "AXFrame" : "{{300.2, 725.5}, {82.3, 81}}", - "AXUniqueId" : null, - "frame" : { - "y" : 725.5, - "x" : 300.2, - "width" : 82.3, - "height" : 81 - }, - "role_description" : "button", - "AXLabel" : "=", - "content_required" : false, - "type" : "Button", - "title" : null, - "help" : null, - "custom_actions" : [ - - ], - "AXValue" : null, - "enabled" : true, - "role" : "AXButton", - "children" : [ - - ], - "subrole" : null, - "pid" : - } - ], - "subrole" : null, - "pid" : - } - ] - ``` +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|±| + e5|tap|button|%| + e6|tap|button|÷| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| Tips - - Use frame coordinates for tap/swipe (center: x+width/2, y+height/2) - - If a debugger is attached, ensure the app is running (not stopped on breakpoints) - - Screenshots are for visual verification only + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. -✅ Accessibility hierarchy retrieved successfully. +✅ Runtime UI snapshot captured with 21 elements, 19 likely targets, and 0 scroll areas. Next steps: 1. Refresh after layout changes: snapshot_ui({ simulatorId: "" }) -2. Tap on element: tap({ simulatorId: "", x: 0, y: 0 }) -3. Take screenshot for verification: screenshot({ simulatorId: "" }) +2. Wait for UI to settle: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) +3. Tap an elementRef: tap({ simulatorId: "", elementRef: "e3" }) +4. Take screenshot for verification: screenshot({ simulatorId: "" }) diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-no-simulator.txt index 05a6c9606..a0e22ad66 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Swipe -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate swipe. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-not-actionable.txt new file mode 100644 index 000000000..8c257311f --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--error-not-actionable.txt @@ -0,0 +1,9 @@ + +👆 Swipe + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'swipeWithin'. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--success.txt deleted file mode 100644 index 9b5ca8373..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/swipe--success.txt +++ /dev/null @@ -1,8 +0,0 @@ - -👆 Swipe - -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Swipe from (200, 400) to (200, 200) simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--error-no-simulator.txt index 3aa5515fa..9f2c04d8b 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Tap -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate tap at (100, 100). +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--success.txt index 6c3da0d59..3fbb4eeaa 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/tap--success.txt @@ -1,8 +1,4 @@ 👆 Tap -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Tap at (100, 400) simulated successfully. +✅ Tap on elementRef e3 simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--error-no-simulator.txt index ad4778d4a..71f0ecf56 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--error-no-simulator.txt @@ -1,8 +1,8 @@ 👆 Touch -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to execute touch event. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--success.txt index b9dad4d4d..9f28f64ad 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--success.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/touch--success.txt @@ -1,8 +1,4 @@ 👆 Touch -Warnings (1): - - ⚠ snapshot_ui has not been called yet. Consider using snapshot_ui for precise coordinates instead of guessing from screenshots. - -✅ Touch event (touch down+up) at (100, 400) executed successfully. +✅ Touch event (touch down+up) on elementRef e3 executed successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-no-simulator.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-no-simulator.txt index 40a192802..99cf12e61 100644 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-no-simulator.txt +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-no-simulator.txt @@ -1,8 +1,8 @@ ⌨️ Type Text -Errors (1): +Recovery + Code: SNAPSHOT_MISSING + Hint: Run snapshot_ui for this simulator, then retry with an elementRef from that snapshot. - ✗ CLIError(errorDescription: "Simulator with UDID not found in set.") - -❌ Failed to simulate text typing. +❌ No runtime UI snapshot is available for this simulator. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-not-actionable.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-not-actionable.txt new file mode 100644 index 000000000..e1e5c9bf8 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--error-not-actionable.txt @@ -0,0 +1,9 @@ + +⌨️ Type Text + +Recovery + Code: TARGET_NOT_ACTIONABLE + Element: e3 + Hint: Choose an elementRef that lists the required action, or refresh with snapshot_ui. + +❌ Element ref 'e3' does not support 'typeText'. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--success.txt deleted file mode 100644 index a3abffa98..000000000 --- a/src/snapshot-tests/__fixtures__/mcp/ui-automation/type-text--success.txt +++ /dev/null @@ -1,4 +0,0 @@ - -⌨️ Type Text - -✅ Text typing simulated successfully. diff --git a/src/snapshot-tests/__fixtures__/mcp/ui-automation/wait-for-ui--success.txt b/src/snapshot-tests/__fixtures__/mcp/ui-automation/wait-for-ui--success.txt new file mode 100644 index 000000000..86c03d978 --- /dev/null +++ b/src/snapshot-tests/__fixtures__/mcp/ui-automation/wait-for-ui--success.txt @@ -0,0 +1,34 @@ + +⚙️ Wait for UI + +Targets (19) — ref|action|role|label|id + e3|tap|button|C| + e4|tap|button|±| + e5|tap|button|%| + e6|tap|button|÷| + e7|tap|button|7| + e8|tap|button|8| + e9|tap|button|9| + e10|tap|button|×| + e11|tap|button|4| + e12|tap|button|5| + e13|tap|button|6| + e14|tap|button|-| + e15|tap|button|1| + e16|tap|button|2| + e17|tap|button|3| + e18|tap|button|+| + e19|tap|button|0| + e20|tap|button|.| + e21|tap|button|=| + +Tips + - Use target refs with tap/type_text. + - Use scroll refs with swipe. + - Use wait_for_ui for text/assertions or changing UI. + +✅ Wait completed; runtime UI snapshot refreshed with 21 elements, 19 likely targets, and 0 scroll areas. + +Next steps: +1. Refresh runtime snapshot: snapshot_ui({ simulatorId: "SIMULATOR_UUID" }) +2. Wait again: wait_for_ui({ simulatorId: "SIMULATOR_UUID", predicate: "settled" }) diff --git a/src/snapshot-tests/__tests__/json-normalize.test.ts b/src/snapshot-tests/__tests__/json-normalize.test.ts index 4c5e9dc55..ce826a230 100644 --- a/src/snapshot-tests/__tests__/json-normalize.test.ts +++ b/src/snapshot-tests/__tests__/json-normalize.test.ts @@ -77,6 +77,64 @@ describe('normalizeStructuredEnvelope', () => { }); }); + it('normalizes volatile runtime snapshot timestamps', () => { + const envelope: StructuredOutputEnvelope = { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 9, + capturedAtMs: 123, + expiresAtMs: 456, + elements: [], + actions: [], + }, + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 42, + }, + }, + }; + + expect(normalizeStructuredEnvelope(envelope)).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: '', + seq: 1, + capturedAtMs: 1_700_000_000_000, + expiresAtMs: 1_700_000_060_000, + elements: [], + actions: [], + }, + uiError: { + code: 'TARGET_NOT_ACTIONABLE', + message: 'Target is not actionable.', + recoveryHint: 'Refresh the snapshot and choose another element.', + snapshotAgeMs: 1234, + }, + }, + }); + }); + it('normalizes volatile build settings PATH entry values without dropping the entry', () => { const envelope: StructuredOutputEnvelope = { schema: 'xcodebuildmcp.output.build-settings', diff --git a/src/snapshot-tests/json-normalize.ts b/src/snapshot-tests/json-normalize.ts index bd7694274..71066a750 100644 --- a/src/snapshot-tests/json-normalize.ts +++ b/src/snapshot-tests/json-normalize.ts @@ -13,6 +13,10 @@ function normalizeString(value: string, key?: string, path: string[] = []): stri return ''; } + if (key === 'screenHash') { + return ''; + } + if (key === 'AXFrame') { // Round embedded floats to 1 decimal place for rounding-stable comparison with // the sibling `frame` object. e.g. 82.666664123535156 -> 82.7, 250.5 stays 250.5. @@ -48,6 +52,15 @@ function normalizeNumber(path: string[], key: string | undefined, value: number) return 3600; case 'threadId': return 1; + case 'capturedAtMs': + return 1_700_000_000_000; + case 'expiresAtMs': + return 1_700_000_060_000; + case 'snapshotAgeMs': + return 1234; + case 'seq': + if (path.includes('capture')) return 1; + return value; case 'x': case 'y': case 'width': @@ -111,14 +124,15 @@ function normalizeXcodeBridgeCallEnvelope( return envelope; } - return { + const normalizedEnvelope: StructuredOutputEnvelope = { ...envelope, data: { ...data, content: [], ...(Object.hasOwn(data, 'structuredContent') ? { structuredContent: {} } : {}), }, - } as StructuredOutputEnvelope; + }; + return normalizedEnvelope; } export function normalizeStructuredEnvelope( diff --git a/src/snapshot-tests/suites/ui-automation-suite.ts b/src/snapshot-tests/suites/ui-automation-suite.ts index 536095bf2..ee2ebaa3c 100644 --- a/src/snapshot-tests/suites/ui-automation-suite.ts +++ b/src/snapshot-tests/suites/ui-automation-suite.ts @@ -13,6 +13,25 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe(`${runtime} ui-automation workflow`, () => { let harness: WorkflowSnapshotHarness; let simulatorUdid: string; + let snapshotCaptured = false; + + async function refreshRuntimeSnapshot(): Promise { + if (snapshotCaptured) { + return; + } + + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { isError } = await harness.invoke('ui-automation', 'snapshot-ui', { + simulatorId: simulatorUdid, + }); + expect(isError).toBe(false); + snapshotCaptured = true; + } beforeAll(async () => { vi.setConfig({ testTimeout: 120_000 }); @@ -22,7 +41,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi await harness.invoke('simulator', 'build-and-run', { workspacePath: WORKSPACE, scheme: 'CalculatorApp', - simulatorName: 'iPhone 17', + simulatorName: 'iPhone 17 Pro', }); await new Promise((resolve) => setTimeout(resolve, 3000)); @@ -34,10 +53,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('tap', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', }); expect(isError).toBe(false); expectFixture(text, 'tap--success'); @@ -46,8 +66,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'tap', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 100, + elementRef: 'e3', }); expect(isError).toBe(true); expectFixture(text, 'tap--error-no-simulator'); @@ -56,10 +75,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('touch', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -70,8 +90,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'touch', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', down: true, up: true, }); @@ -82,10 +101,11 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi describe('long-press', () => { it('success', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: simulatorUdid, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(false); @@ -95,8 +115,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'long-press', { simulatorId: INVALID_SIMULATOR_ID, - x: 100, - y: 400, + elementRef: 'e3', duration: 500, }); expect(isError).toBe(true); @@ -105,25 +124,23 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('swipe', () => { - it('success', async () => { + it('error - target not actionable', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: simulatorUdid, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: 'e3', + direction: 'up', }); - expect(isError).toBe(false); - expectFixture(text, 'swipe--success'); + expect(isError).toBe(true); + expectFixture(text, 'swipe--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'swipe', { simulatorId: INVALID_SIMULATOR_ID, - x1: 200, - y1: 400, - x2: 200, - y2: 200, + withinElementRef: 'e3', + direction: 'up', }); expect(isError).toBe(true); expectFixture(text, 'swipe--error-no-simulator'); @@ -211,18 +228,22 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); describe('type-text', () => { - it('success', async () => { + it('error - target not actionable', async () => { + await refreshRuntimeSnapshot(); + const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: simulatorUdid, + elementRef: 'e3', text: 'hello', }); - expect(isError).toBe(false); - expectFixture(text, 'type-text--success'); + expect(isError).toBe(true); + expectFixture(text, 'type-text--error-not-actionable'); }); it('error - invalid simulator', async () => { const { text, isError } = await harness.invoke('ui-automation', 'type-text', { simulatorId: INVALID_SIMULATOR_ID, + elementRef: 'e3', text: 'hello', }); expect(isError).toBe(true); @@ -230,6 +251,28 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi }); }); + describe('wait-for-ui', () => { + it('success - existing calculator button', async () => { + await harness.invoke('simulator', 'launch-app', { + simulatorId: simulatorUdid, + bundleId: BUNDLE_ID, + }); + await new Promise((resolve) => setTimeout(resolve, 1500)); + + const { text, isError } = await harness.invoke('ui-automation', 'wait-for-ui', { + simulatorId: simulatorUdid, + predicate: 'exists', + label: 'C', + role: 'button', + timeoutMs: 1000, + pollIntervalMs: 100, + }); + expect(isError).toBe(false); + expectFixture(text, 'wait-for-ui--success'); + snapshotCaptured = true; + }); + }); + describe('snapshot-ui', () => { it('success - calculator app', async () => { // Re-focus the calculator app before snapshotting: preceding UI tests @@ -247,6 +290,7 @@ export function registerUiAutomationSnapshotSuite(runtime: SnapshotRuntime): voi expect(isError).toBe(false); expect(text.length).toBeGreaterThan(100); expectFixture(text, 'snapshot-ui--success'); + snapshotCaptured = true; }); it('error - invalid simulator', async () => { diff --git a/src/types/domain-results.ts b/src/types/domain-results.ts index 236b7ca6f..320e114bb 100644 --- a/src/types/domain-results.ts +++ b/src/types/domain-results.ts @@ -50,6 +50,12 @@ export type AtLeastOne = { [K in keyof T]-?: Required> & Partial>; }[keyof T]; import type { BuildInvocationRequest } from './domain-fragments.ts'; +import type { + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from './ui-snapshot.ts'; export type ExecutionStatus = 'SUCCEEDED' | 'FAILED'; export type BuildTarget = 'simulator' | 'device' | 'macos' | 'swift-package'; @@ -232,7 +238,11 @@ export interface CaptureUiHierarchyPayload { type: 'ui-hierarchy'; uiHierarchy: AccessibilityNode[]; } -export type CapturePayload = CaptureImagePayload | CaptureUiHierarchyPayload; +export type CapturePayload = + | CaptureImagePayload + | CaptureUiHierarchyPayload + | RuntimeSnapshotV1 + | RuntimeSnapshotUnchangedV1; export interface DebugFileLineBreakpoint { kind: 'file-line'; file: string; @@ -339,27 +349,22 @@ export interface TestSelectionInfo { } export interface UiActionTap { type: 'tap'; - x?: number; - y?: number; - id?: string; - label?: string; + elementRef: string; } export interface UiActionSwipe { type: 'swipe'; - from?: Point; - to?: Point; + withinElementRef: string; + direction: 'up' | 'down' | 'left' | 'right'; durationSeconds?: number; } export interface UiActionTouch { type: 'touch'; + elementRef: string; event?: string; - x?: number; - y?: number; } export interface UiActionLongPress { type: 'long-press'; - x: number; - y: number; + elementRef: string; durationMs: number; } export interface UiActionButton { @@ -372,6 +377,8 @@ export interface UiActionGesture { } export interface UiActionTypeText { type: 'type-text'; + elementRef: string; + textLength?: number; } export interface UiActionKeyPress { type: 'key-press'; @@ -381,6 +388,10 @@ export interface UiActionKeySequence { type: 'key-sequence'; keyCodes: number[]; } +export interface UiActionBatch { + type: 'batch'; + stepCount: number; +} export type UiAction = | UiActionTap | UiActionSwipe @@ -390,7 +401,8 @@ export type UiAction = | UiActionGesture | UiActionTypeText | UiActionKeyPress - | UiActionKeySequence; + | UiActionKeySequence + | UiActionBatch; export interface SimulatorActionBoot { type: 'boot'; } @@ -481,6 +493,8 @@ export type CaptureResultDomainResult = ToolDomainResultBase & { artifacts: { simulatorId: string; screenshotPath?: string }; capture?: CapturePayload; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; + waitMatch?: UiWaitMatch; }; export type CoverageResultDomainResult = ToolDomainResultBase & { kind: 'coverage-result'; @@ -620,6 +634,7 @@ export type UiActionResultDomainResult = ToolDomainResultBase & { action: UiAction; artifacts: { simulatorId: string }; diagnostics?: BasicDiagnostics; + uiError?: UiAutomationRecoverableError; }; export type XcodeBridgeCallResultDomainResult = ToolDomainResultBase & { kind: 'xcode-bridge-call-result'; diff --git a/src/types/ui-snapshot.ts b/src/types/ui-snapshot.ts new file mode 100644 index 000000000..6f4d09cad --- /dev/null +++ b/src/types/ui-snapshot.ts @@ -0,0 +1,148 @@ +import type { AccessibilityNode, Frame, Point } from './domain-results.ts'; + +export type RuntimeSnapshotProtocol = 'rs/1'; +export type RuntimeSnapshotCaptureType = 'runtime-snapshot'; + +export type RuntimeActionNameV1 = 'tap' | 'typeText' | 'longPress' | 'touch' | 'swipeWithin'; + +export type RuntimeElementRoleV1 = + | 'application' + | 'button' + | 'cell' + | 'image' + | 'keyboard-key' + | 'list' + | 'menu' + | 'other' + | 'scroll-view' + | 'slider' + | 'switch' + | 'tab' + | 'text' + | 'text-field' + | 'window'; + +export interface RuntimeElementStateV1 { + enabled?: boolean; + focused?: boolean; + selected?: boolean; + visible?: boolean; +} + +export interface RuntimeElementV1 { + ref: string; + role?: RuntimeElementRoleV1; + label?: string; + value?: string; + identifier?: string; + frame: Frame; + state?: RuntimeElementStateV1; + actions: RuntimeActionNameV1[]; +} + +export interface RuntimeActionHintV1 { + action: RuntimeActionNameV1; + elementRef: string; + label?: string; +} + +export interface RuntimeSnapshotV1 { + type: RuntimeSnapshotCaptureType; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + elements: RuntimeElementV1[]; + actions: RuntimeActionHintV1[]; +} + +export interface RuntimeSnapshotUnchangedV1 { + type: 'runtime-snapshot-unchanged'; + protocol: RuntimeSnapshotProtocol; + simulatorId: string; + screenHash: string; + seq: number; +} + +export interface RuntimeSnapshotMetadata { + path: string; + depth: number; + childCount: number; + signature: string; + activationPoint?: Point; + swipeFrame?: Frame; +} + +export interface RuntimeSnapshotElementRecord { + publicElement: RuntimeElementV1; + metadata: RuntimeSnapshotMetadata; + rawNode: AccessibilityNode; +} + +export interface RuntimeSnapshotRecord { + simulatorId: string; + screenHash: string; + seq: number; + capturedAtMs: number; + expiresAtMs: number; + payload: RuntimeSnapshotV1; + elements: RuntimeSnapshotElementRecord[]; + elementsByRef: Map; +} + +export type RuntimeSnapshotLookupStatus = 'available' | 'expired' | 'missing'; + +export interface RuntimeSnapshotLookup { + status: RuntimeSnapshotLookupStatus; + snapshot: RuntimeSnapshotRecord | null; + snapshotAgeMs?: number; +} + +export type UiAutomationRecoverableErrorCode = + | 'SNAPSHOT_MISSING' + | 'SNAPSHOT_EXPIRED' + | 'SNAPSHOT_PARSE_FAILED' + | 'ELEMENT_REF_NOT_FOUND' + | 'TARGET_NOT_FOUND' + | 'TARGET_AMBIGUOUS' + | 'TARGET_NOT_ACTIONABLE' + | 'WAIT_TIMEOUT' + | 'UI_STATE_CHANGED' + | 'ACTION_FAILED'; + +export interface UiAutomationRecoverableError { + code: UiAutomationRecoverableErrorCode; + message: string; + recoveryHint: string; + elementRef?: string; + candidates?: RuntimeElementV1[]; + snapshotAgeMs?: number; + timeoutMs?: number; +} + +export type UiWaitPredicate = + | 'exists' + | 'gone' + | 'enabled' + | 'focused' + | 'textContains' + | 'settled'; + +export interface UiWaitMatch { + predicate: UiWaitPredicate; + matches: RuntimeElementV1[]; +} + +export type RuntimeElementResolution = + | { + ok: true; + snapshot: RuntimeSnapshotRecord; + element: RuntimeSnapshotElementRecord; + snapshotAgeMs: number; + } + | { + ok: false; + error: UiAutomationRecoverableError; + }; diff --git a/src/utils/__tests__/structured-output-envelope.test.ts b/src/utils/__tests__/structured-output-envelope.test.ts index 0356112c7..c440a6587 100644 --- a/src/utils/__tests__/structured-output-envelope.test.ts +++ b/src/utils/__tests__/structured-output-envelope.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from 'vitest'; import { toStructuredEnvelope } from '../structured-output-envelope.ts'; import type { BuildResultDomainResult, + CaptureResultDomainResult, DeviceListDomainResult, } from '../../types/domain-results.ts'; @@ -49,4 +50,305 @@ describe('toStructuredEnvelope', () => { data: null, }); }); + + it('compacts runtime snapshots inside the capture payload by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 1, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'San Francisco', + identifier: 'weather.locationButton', + frame: { x: 12, y: 81, width: 178, height: 33 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'San Francisco' }, + ], + }, + }; + + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + rs: '1', + screenHash: 'screen-one', + seq: 1, + count: 2, + targets: ['e2|tap|button|San Francisco||weather.locationButton'], + scroll: ['e1|swipe|application|Weather||'], + udid: 'SIMULATOR-1', + }, + waitMatch: { + predicate: 'exists', + matches: ['e2|tap|button|San Francisco||weather.locationButton'], + }, + }, + }); + }); + + it('compacts unchanged runtime snapshot captures by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-one', + seq: 2, + }, + }; + + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: false, + error: null, + data: { + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: 'screen-one', + seq: 2, + unchanged: true, + udid: 'SIMULATOR-1', + }, + }, + }); + }); + + it('orders compact runtime snapshot targets by usefulness', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-two', + seq: 2, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, + { + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], + }, + ], + actions: [], + }, + }; + + const envelope = toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2'); + + expect(envelope.data).toMatchObject({ + capture: { + screenHash: 'screen-two', + seq: 2, + targets: [ + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + 'e8|typeText|text-field||Portland|', + 'e3|tap|button|Settings||', + 'e9|tap|button|Clear search||', + ], + }, + }); + }); + + it('compacts runtime snapshot candidates inside recoverable UI errors by default', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + { + ref: 'e8', + role: 'text-field', + value: 'Lisbon', + identifier: 'weather.locationsSheet', + frame: { x: 65, y: 482, width: 272, height: 18 }, + actions: ['tap', 'typeText', 'longPress', 'touch'], + }, + { + ref: 'e11', + role: 'button', + label: 'Lisbon, Portugal', + value: 'saved', + frame: { x: 40, y: 552, width: 89, height: 49 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + }, + }; + + expect(toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2')).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + data: { + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + 'e8|typeText|text-field||Lisbon|weather.locationsSheet', + 'e11|tap|button|Lisbon, Portugal|saved|', + ], + }, + }, + }); + }); + + it('can keep full runtime snapshots and candidates for verbose callers', () => { + const result: CaptureResultDomainResult = { + kind: 'capture-result', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-three', + seq: 3, + capturedAtMs: 1_000, + expiresAtMs: 61_000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + actions: [{ action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }], + }, + uiError: { + code: 'TARGET_AMBIGUOUS', + message: 'The wait selector matched multiple runtime UI elements.', + recoveryHint: 'Provide a more specific selector.', + candidates: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + ], + }, + }; + + expect( + toStructuredEnvelope(result, 'xcodebuildmcp.output.capture-result', '2', { + runtimeSnapshot: 'full', + }), + ).toEqual({ + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + didError: true, + error: 'The wait selector matched multiple runtime UI elements.', + data: { + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: result.capture, + uiError: result.uiError, + }, + }); + }); }); diff --git a/src/utils/renderers/__tests__/cli-text-renderer.test.ts b/src/utils/renderers/__tests__/cli-text-renderer.test.ts index 78da2b98f..b7173dc79 100644 --- a/src/utils/renderers/__tests__/cli-text-renderer.test.ts +++ b/src/utils/renderers/__tests__/cli-text-renderer.test.ts @@ -571,6 +571,498 @@ describe('cli-text-renderer', () => { expect(output).toContain('└ App Path: /tmp/MyApp.app'); }); + it('renders runtime UI snapshots as compact target lists', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + identifier: 'add-button', + value: 'selected', + frame: { x: 10, y: 20, width: 30, height: 40 }, + state: { enabled: true, visible: true }, + actions: ['tap', 'longPress'], + }, + { + ref: 'e2', + role: 'text', + label: 'Total', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: [], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Add' }], + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Add|selected|add-button'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + expect(output).not.toContain('- Use scroll refs with swipe.'); + expect(output).not.toContain('Accessibility Hierarchy'); + expect(output).not.toContain('```json'); + }); + + it('renders unchanged runtime UI snapshots compactly', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot-unchanged', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 2, + }, + }, + }, + }); + + expect(output).toContain('📷 Snapshot UI'); + expect(output).toContain('Runtime UI snapshot unchanged (screenHash: screen-hash, seq: 2).'); + expect(output).not.toContain('Targets ('); + expect(output).not.toContain('Tips'); + }); + + it('orders useful runtime targets before chrome controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e2', + role: 'button', + label: 'Sheet Grabber', + value: 'Expanded', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['tap'], + }, + { + ref: 'e3', + role: 'button', + label: 'Settings', + frame: { x: 320, y: 40, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e8', + role: 'text-field', + value: 'Portland', + frame: { x: 20, y: 100, width: 200, height: 40 }, + actions: ['typeText'], + }, + { + ref: 'e9', + role: 'button', + label: 'Clear search', + frame: { x: 230, y: 100, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: 'Remove', + identifier: 'trash', + frame: { x: 300, y: 180, width: 40, height: 40 }, + actions: ['tap'], + }, + { + ref: 'e82', + role: 'button', + label: 'PRECIP., 78%, Next 24 hours', + identifier: 'weather.precipitationCard', + frame: { x: 20, y: 300, width: 340, height: 140 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e2', label: 'Sheet Grabber' }, + { action: 'tap', elementRef: 'e3', label: 'Settings' }, + { action: 'typeText', elementRef: 'e8' }, + { action: 'tap', elementRef: 'e9', label: 'Clear search' }, + { action: 'tap', elementRef: 'e10', label: 'Remove' }, + { action: 'tap', elementRef: 'e82', label: 'PRECIP., 78%, Next 24 hours' }, + ], + }, + }, + }, + }); + + const precipitationIndex = output.indexOf( + 'e82|tap|button|PRECIP., 78%, Next 24 hours||weather.precipitationCard', + ); + const searchIndex = output.indexOf('e8|typeText|text-field||Portland|'); + const settingsIndex = output.indexOf('e3|tap|button|Settings||'); + const clearSearchIndex = output.indexOf('e9|tap|button|Clear search||'); + const removeIndex = output.indexOf('e10|tap|button|Remove||trash'); + + expect(precipitationIndex).toBeGreaterThanOrEqual(0); + expect(searchIndex).toBeGreaterThan(precipitationIndex); + expect(settingsIndex).toBeGreaterThan(searchIndex); + expect(output).not.toContain('e2|tap|button|Sheet Grabber|Expanded|'); + expect(clearSearchIndex).toBeGreaterThan(settingsIndex); + expect(removeIndex).toBeGreaterThan(settingsIndex); + }); + + it('orders unselected segmented controls before already-selected controls in compact output', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e9', + role: 'button', + label: '°F', + value: 'selected', + frame: { x: 20, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + { + ref: 'e10', + role: 'button', + label: '°C', + value: 'not selected', + frame: { x: 100, y: 40, width: 70, height: 44 }, + actions: ['tap'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e9', label: '°F' }, + { action: 'tap', elementRef: 'e10', label: '°C' }, + ], + }, + }, + }, + }); + + const selectedIndex = output.indexOf('e9|tap|button|°F|selected|'); + const unselectedIndex = output.indexOf('e10|tap|button|°C|not selected|'); + + expect(unselectedIndex).toBeGreaterThanOrEqual(0); + expect(selectedIndex).toBeGreaterThan(unselectedIndex); + }); + + it('does not list static text as a likely runtime target when only low-level actions are present', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + { + ref: 'e2', + role: 'text', + label: 'Updated just now', + frame: { x: 0, y: 0, width: 100, height: 20 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'tap', elementRef: 'e1', label: 'Settings' }, + { action: 'longPress', elementRef: 'e2', label: 'Updated just now' }, + { action: 'touch', elementRef: 'e2', label: 'Updated just now' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Settings||'); + expect(output).not.toContain('e2|'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders runtime UI snapshot scroll areas separately from likely targets', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'application', + label: 'Weather', + frame: { x: 0, y: 0, width: 390, height: 844 }, + actions: ['swipeWithin'], + }, + { + ref: 'e2', + role: 'button', + label: 'Settings', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap', 'longPress', 'touch'], + }, + ], + actions: [ + { action: 'swipeWithin', elementRef: 'e1', label: 'Weather' }, + { action: 'tap', elementRef: 'e2', label: 'Settings' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Targets (1) — ref|action|role|label|value|id'); + expect(output).toContain('e2|tap|button|Settings||'); + expect(output).toContain('Scroll (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|swipe|application|Weather||'); + expect(output).toContain('- Use scroll refs with swipe.'); + expect(output).toContain( + 'Runtime UI snapshot captured with 2 elements, 1 likely target, and 1 scroll area.', + ); + }); + + it('renders wait_for_ui output with wait-specific text', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'exists', + matches: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e1', + role: 'button', + label: 'Continue', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + actions: [{ action: 'tap', elementRef: 'e1', label: 'Continue' }], + }, + }, + }, + }); + + expect(output).toContain('⚙️ Wait for UI'); + expect(output).toContain('Matched exists (1) — ref|action|role|label|value|id'); + expect(output).toContain('e1|tap|button|Continue||'); + expect(output).toContain( + 'Wait completed; runtime UI snapshot refreshed with 1 element, 1 likely target, and 0 scroll areas.', + ); + }); + + it('renders static wait matches with no primary action', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.capture-result', + schemaVersion: '2', + renderHints: { headerTitle: 'Wait for UI' }, + result: { + kind: 'capture-result', + didError: false, + error: null, + summary: { status: 'SUCCEEDED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + waitMatch: { + predicate: 'textContains', + matches: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + }, + capture: { + type: 'runtime-snapshot', + protocol: 'rs/1', + simulatorId: 'SIMULATOR-1', + screenHash: 'screen-hash', + seq: 1, + capturedAtMs: 1000, + expiresAtMs: 61000, + elements: [ + { + ref: 'e11', + role: 'text', + label: 'No matches', + frame: { x: 20, y: 240, width: 120, height: 24 }, + actions: ['longPress', 'touch'], + }, + ], + actions: [ + { action: 'longPress', elementRef: 'e11', label: 'No matches' }, + { action: 'touch', elementRef: 'e11', label: 'No matches' }, + ], + }, + }, + }, + }); + + expect(output).toContain('Matched textContains (1) — ref|action|role|label|value|id'); + expect(output).toContain('e11|none|text|No matches||'); + expect(output).not.toContain('e11|longPress|text|No matches||'); + }); + + it('renders typed UI action recovery hints', () => { + const output = renderCliTextTranscript({ + structuredOutput: { + schema: 'xcodebuildmcp.output.ui-action-result', + schemaVersion: '2', + result: { + kind: 'ui-action-result', + didError: true, + error: 'Element reference e9 was not found in the current runtime snapshot.', + summary: { status: 'FAILED' }, + artifacts: { simulatorId: 'SIMULATOR-1' }, + action: { type: 'tap', elementRef: 'e9' }, + uiError: { + code: 'ELEMENT_REF_NOT_FOUND', + message: 'Element reference e9 was not found in the current runtime snapshot.', + recoveryHint: 'Run snapshot_ui again and retry with a current element reference.', + elementRef: 'e9', + candidates: [ + { + ref: 'e1', + role: 'button', + label: 'Add', + frame: { x: 10, y: 20, width: 30, height: 40 }, + actions: ['tap'], + }, + ], + }, + }, + }, + }); + + expect(output).toContain('Recovery'); + expect(output).toContain('Code: ELEMENT_REF_NOT_FOUND'); + expect(output).toContain('Element: e9'); + expect(output).toContain( + 'Hint: Run snapshot_ui again and retry with a current element reference.', + ); + expect(output).toContain('Candidates (1):'); + expect(output).toContain('e1|tap|button|Add||'); + expect(output).toContain( + '❌ Element reference e9 was not found in the current runtime snapshot.', + ); + }); + it('renders structured output path artifacts as a tree when requested', () => { const output = renderCliTextTranscript({ filePathRenderStyle: 'tree', diff --git a/src/utils/renderers/cli-text-renderer.ts b/src/utils/renderers/cli-text-renderer.ts index 82b932910..ca0c1f850 100644 --- a/src/utils/renderers/cli-text-renderer.ts +++ b/src/utils/renderers/cli-text-renderer.ts @@ -72,6 +72,7 @@ interface CliTextProcessorOptions { showTestTiming: boolean; filePathRenderStyle: FilePathRenderStyle; includeHeaderDetails: boolean; + includeNextSteps: boolean; } interface CliTextRendererOptions { @@ -80,6 +81,7 @@ interface CliTextRendererOptions { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } export interface CliTextTranscriptInput { @@ -91,6 +93,7 @@ export interface CliTextTranscriptInput { showTestTiming?: boolean; filePathRenderStyle?: FilePathRenderStyle; includeHeaderDetails?: boolean; + includeNextSteps?: boolean; } interface XcodebuildParserState { @@ -109,6 +112,7 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen showTestTiming, filePathRenderStyle, includeHeaderDetails, + includeNextSteps, } = options; const groupedCompilerErrors: CompilerErrorRenderItem[] = []; const groupedWarnings: CompilerWarningRenderItem[] = []; @@ -469,7 +473,9 @@ function createCliTextProcessor(options: CliTextProcessorOptions): TranscriptRen groupedCompilerErrors.length = 0; groupedTestFailures.length = 0; groupedWarnings.length = 0; - const nextStepsBlock = createNextStepsBlock(nextSteps, nextStepsRuntime); + const nextStepsBlock = includeNextSteps + ? createNextStepsBlock(nextSteps, nextStepsRuntime) + : null; if (nextStepsBlock && !sawProgressNextSteps) { processItem(nextStepsBlock); } @@ -505,6 +511,7 @@ export function createCliTextRenderer(options: CliTextRendererOptions): Transcri showTestTiming: options.showTestTiming ?? false, filePathRenderStyle: options.filePathRenderStyle ?? 'list', includeHeaderDetails: options.includeHeaderDetails ?? true, + includeNextSteps: options.includeNextSteps ?? true, sink: { clearTransient(): void { reporter.clear(); @@ -530,6 +537,7 @@ export function renderCliTextTranscript(input: CliTextTranscriptInput = {}): str showTestTiming: input.showTestTiming ?? false, filePathRenderStyle: input.filePathRenderStyle ?? 'list', includeHeaderDetails: input.includeHeaderDetails ?? true, + includeNextSteps: input.includeNextSteps ?? true, sink: { clearTransient(): void {}, updateTransient(): void {}, diff --git a/src/utils/renderers/domain-result-text.ts b/src/utils/renderers/domain-result-text.ts index 40a5f6186..9df4a4ddb 100644 --- a/src/utils/renderers/domain-result-text.ts +++ b/src/utils/renderers/domain-result-text.ts @@ -5,6 +5,13 @@ import type { TestDiagnostics, ToolDomainResult, } from '../../types/domain-results.ts'; +import type { + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, + UiAutomationRecoverableError, + UiWaitMatch, +} from '../../types/ui-snapshot.ts'; import type { RenderHints } from '../../rendering/types.ts'; import type { XcodebuildOperation } from '../../types/domain-fragments.ts'; import type { @@ -160,6 +167,8 @@ type CaptureResultWithVideo = Extract= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function formatRuntimeElementLine(element: RuntimeElementV1, action?: string): string { + const primaryAction = + action ?? + (element.actions.includes('typeText') + ? 'typeText' + : element.actions.includes('tap') + ? 'tap' + : element.actions.includes('swipeWithin') + ? 'swipe' + : 'none'); + return [ + element.ref, + primaryAction, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function isLikelyRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + !isHiddenRuntimeTarget(element) && + element.actions.some((action) => action === 'tap' || action === 'typeText') + ); +} + +function isScrollableRuntimeArea(element: RuntimeElementV1): boolean { + return element.actions.includes('swipeWithin') && !isLikelyRuntimeTarget(element); +} + +function countLikelyRuntimeTargets(snapshot: RuntimeSnapshotV1): number { + return snapshot.elements.filter(isLikelyRuntimeTarget).length; +} + +function countScrollableRuntimeAreas(snapshot: RuntimeSnapshotV1): number { + return snapshot.elements.filter(isScrollableRuntimeArea).length; +} + +function createRuntimeSnapshotTargetsSection(snapshot: RuntimeSnapshotV1): SectionTextBlock { + const likelyTargets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter(isLikelyRuntimeTarget), + ); + const lines = likelyTargets.map((element) => formatRuntimeElementLine(element)); + + return createSection( + `Targets (${likelyTargets.length}) — ref|action|role|label|value|id`, + lines.length > 0 ? lines : ['(no likely interaction targets found)'], + ); +} + +function createRuntimeSnapshotScrollAreasSection( + snapshot: RuntimeSnapshotV1, +): SectionTextBlock | null { + const scrollAreas = snapshot.elements.filter(isScrollableRuntimeArea); + if (scrollAreas.length === 0) { + return null; + } + + return createSection( + `Scroll (${scrollAreas.length}) — ref|action|role|label|value|id`, + scrollAreas.map((element) => formatRuntimeElementLine(element, 'swipe')), + ); +} + +function createWaitMatchSection(waitMatch: UiWaitMatch): SectionTextBlock { + return createSection( + `Matched ${waitMatch.predicate} (${waitMatch.matches.length}) — ref|action|role|label|value|id`, + waitMatch.matches.length > 0 + ? waitMatch.matches.map((element) => formatRuntimeElementLine(element)) + : ['(no matching elements found)'], + ); +} + +function createUiErrorItems(uiError?: UiAutomationRecoverableError): TextRenderableItem[] { + if (!uiError) { + return []; + } + + const lines = [ + `Code: ${uiError.code}`, + ...(uiError.elementRef ? [`Element: ${uiError.elementRef}`] : []), + ...(typeof uiError.timeoutMs === 'number' ? [`Timeout: ${uiError.timeoutMs}ms`] : []), + `Hint: ${uiError.recoveryHint}`, + ]; + + if (uiError.candidates && uiError.candidates.length > 0) { + lines.push( + `Candidates (${uiError.candidates.length}):`, + ...uiError.candidates.map((candidate) => ` ${formatRuntimeElementLine(candidate)}`), + ); + } + + return [createSection('Recovery', lines)]; +} + function createSimulatorActionItems( result: Extract, ): TextRenderableItem[] { @@ -1215,6 +1403,7 @@ function createSimulatorActionItems( function createCaptureResultItems( rawResult: Extract, + hints?: RenderHints, ): TextRenderableItem[] { const result = rawResult as CaptureResultWithVideo; @@ -1254,10 +1443,18 @@ function createCaptureResultItems( return items; } + const capture = result.capture; + const isRuntimeSnapshot = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot'; + const isRuntimeSnapshotUnchanged = + capture !== undefined && 'type' in capture && capture.type === 'runtime-snapshot-unchanged'; const isUiHierarchy = - (result.capture && 'type' in result.capture && result.capture.type === 'ui-hierarchy') || - result.error?.includes('accessibility hierarchy') === true; - const title = isUiHierarchy ? 'Snapshot UI' : 'Screenshot'; + (capture !== undefined && 'type' in capture && capture.type === 'ui-hierarchy') || + isRuntimeSnapshot || + isRuntimeSnapshotUnchanged || + result.error?.includes('accessibility hierarchy') === true || + result.error?.includes('runtime UI snapshot') === true; + const title = hints?.headerTitle ?? (isUiHierarchy ? 'Snapshot UI' : 'Screenshot'); const items: TextRenderableItem[] = [ createHeader(title, [ ...(result.artifacts.simulatorId @@ -1267,10 +1464,61 @@ function createCaptureResultItems( ]; if (result.didError) { + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); items.push( - ...createFailureStatusWithDiagnostics( - result, - isUiHierarchy ? 'Failed to get accessibility hierarchy.' : 'Failed to capture screenshot.', + createStatus( + 'error', + result.error ?? + (isUiHierarchy + ? isRuntimeSnapshot + ? 'Failed to get runtime UI snapshot.' + : 'Failed to get accessibility hierarchy.' + : 'Failed to capture screenshot.'), + ), + ); + return items; + } + + if (isRuntimeSnapshotUnchanged) { + const capture = result.capture as RuntimeSnapshotUnchangedV1; + items.push( + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + `Runtime UI snapshot unchanged (screenHash: ${capture.screenHash}, seq: ${capture.seq}).`, + ), + ); + return items; + } + + if (isRuntimeSnapshot) { + const snapshot = result.capture as RuntimeSnapshotV1; + const likelyTargetCount = countLikelyRuntimeTargets(snapshot); + const scrollAreaCount = countScrollableRuntimeAreas(snapshot); + const scrollAreasSection = createRuntimeSnapshotScrollAreasSection(snapshot); + if (title === 'Wait for UI' && result.waitMatch) { + items.push(createWaitMatchSection(result.waitMatch)); + } + items.push(createRuntimeSnapshotTargetsSection(snapshot)); + if (scrollAreasSection) { + items.push(scrollAreasSection); + } + items.push( + createSection('Tips', [ + '- Use target refs with tap, type_text, long_press, and touch.', + ...(scrollAreaCount > 0 ? ['- Use scroll refs with swipe.'] : []), + '- Refs are snapshot-specific; after snapshot_ui or wait_for_ui, use refs from the latest output.', + '- Use wait_for_ui for text/assertions or changing UI.', + ]), + ); + items.push( + ...createStandardDiagnosticSections(result.diagnostics), + createStatus( + 'success', + title === 'Wait for UI' + ? `Wait completed; runtime UI snapshot refreshed with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.` + : `Runtime UI snapshot captured with ${pluralize(snapshot.elements.length, 'element')}, ${pluralize(likelyTargetCount, 'likely target')}, and ${pluralize(scrollAreaCount, 'scroll area')}.`, ), ); return items; @@ -2057,7 +2305,7 @@ function createSpecialCaseItems( case 'simulator-action-result': return createSimulatorActionItems(result); case 'capture-result': - return createCaptureResultItems(result); + return createCaptureResultItems(result, hints); case 'process-list': return createProcessListItems(result); case 'coverage-result': @@ -2093,6 +2341,7 @@ function createSpecialCaseItems( 'type-text': 'Type Text', 'key-press': 'Key Press', 'key-sequence': 'Key Sequence', + batch: 'Batch UI Actions', }; const items: TextRenderableItem[] = [ createHeader(headerTitleMap[result.action.type], [ @@ -2100,40 +2349,31 @@ function createSpecialCaseItems( ]), ]; if (result.didError) { - items.push(...createFailureStatusWithDiagnostics(result, 'UI action failed.')); + items.push(...createStandardDiagnosticSections(result.diagnostics)); + items.push(...createUiErrorItems(result.uiError)); + items.push(createStatus('error', result.error ?? 'UI action failed.')); return items; } let successMessage = 'UI action completed successfully.'; switch (result.action.type) { case 'tap': - successMessage = - typeof result.action.x === 'number' && typeof result.action.y === 'number' - ? `Tap at (${result.action.x}, ${result.action.y}) simulated successfully.` - : result.action.id - ? `Tap on element id "${result.action.id}" simulated successfully.` - : result.action.label - ? `Tap on element label "${result.action.label}" simulated successfully.` - : successMessage; + successMessage = `Tap on elementRef ${result.action.elementRef} simulated successfully.`; break; case 'swipe': { const durationText = typeof result.action.durationSeconds === 'number' ? ` duration=${result.action.durationSeconds}s` : ''; - if (result.action.from && result.action.to) { - successMessage = - `Swipe from (${result.action.from.x}, ${result.action.from.y}) to (${result.action.to.x}, ${result.action.to.y})` + - `${durationText} simulated successfully.`; - } + successMessage = + `Swipe ${result.action.direction} within elementRef ${result.action.withinElementRef}` + + `${durationText} simulated successfully.`; break; } case 'touch': - if (typeof result.action.x === 'number' && typeof result.action.y === 'number') { - successMessage = `Touch event (${result.action.event ?? 'touch'}) at (${result.action.x}, ${result.action.y}) executed successfully.`; - } + successMessage = `Touch event (${result.action.event}) on elementRef ${result.action.elementRef} executed successfully.`; break; case 'long-press': - successMessage = `Long press at (${result.action.x}, ${result.action.y}) for ${result.action.durationMs}ms simulated successfully.`; + successMessage = `Long press on elementRef ${result.action.elementRef} for ${result.action.durationMs}ms simulated successfully.`; break; case 'button': successMessage = `Hardware button '${result.action.button}' pressed successfully.`; @@ -2141,15 +2381,26 @@ function createSpecialCaseItems( case 'gesture': successMessage = `Gesture '${result.action.gesture}' executed successfully.`; break; - case 'type-text': - successMessage = 'Text typing simulated successfully.'; + case 'type-text': { + const targetText = result.action.elementRef + ? ` into elementRef ${result.action.elementRef}` + : ''; + const lengthText = + typeof result.action.textLength === 'number' + ? ` (${pluralize(result.action.textLength, 'character')})` + : ''; + successMessage = `Text typed${targetText}${lengthText} successfully.`; break; + } case 'key-press': successMessage = `Key press (code: ${result.action.keyCode}) simulated successfully.`; break; case 'key-sequence': successMessage = `Key sequence [${result.action.keyCodes.join(',')}] executed successfully.`; break; + case 'batch': + successMessage = `Batch UI automation completed successfully (${pluralize(result.action.stepCount, 'step')}).`; + break; } items.push( ...createStandardDiagnosticSections(result.diagnostics), diff --git a/src/utils/structured-output-envelope.ts b/src/utils/structured-output-envelope.ts index 1fec8ee90..ecb008adb 100644 --- a/src/utils/structured-output-envelope.ts +++ b/src/utils/structured-output-envelope.ts @@ -1,23 +1,306 @@ -import type { ToolDomainResult } from '../types/domain-results.js'; -import type { StructuredOutputEnvelope } from '../types/structured-output.js'; +import type { ToolDomainResult } from '../types/domain-results.ts'; +import type { StructuredOutputEnvelope } from '../types/structured-output.ts'; +import type { + RuntimeActionNameV1, + RuntimeElementV1, + RuntimeSnapshotUnchangedV1, + RuntimeSnapshotV1, +} from '../types/ui-snapshot.ts'; type DomainResultData = Omit< TResult, 'kind' | 'didError' | 'error' >; +export type RuntimeSnapshotEnvelopeMode = 'compact' | 'full'; + +export interface StructuredEnvelopeOptions { + runtimeSnapshot?: RuntimeSnapshotEnvelopeMode; +} + +type RuntimeSnapshotCompactCapture = { + type: 'runtime-snapshot'; + rs: '1'; + screenHash: string; + seq: number; + count: number; + targets: string[]; + scroll: string[]; + udid: string; +}; + +type RuntimeSnapshotUnchangedCompactCapture = { + type: 'runtime-snapshot-unchanged'; + rs: '1'; + screenHash: string; + seq: number; + unchanged: true; + udid: string; +}; + +const HIDDEN_RUNTIME_TARGET_LABELS = new Set(['sheet grabber']); + +const LOW_PRIORITY_RUNTIME_TARGET_LABELS = new Set([ + 'sheet grabber', + 'close', + 'clear search', + 'remove', + 'delete', + 'clear', + 'c', + 'ac', + '±', + '%', + '÷', + '×', + '-', + '+', + '=', +]); + +function compactRuntimeSnapshotText(value: string | undefined): string { + return (value ?? '').replace(/\s+/g, ' ').replace(/\|/g, '/').trim(); +} + +function normalizedRuntimeSnapshotText(value: string | undefined): string { + return compactRuntimeSnapshotText(value).toLocaleLowerCase(); +} + +function isHiddenRuntimeTarget(element: RuntimeElementV1): boolean { + return HIDDEN_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isLowPriorityRuntimeTarget(element: RuntimeElementV1): boolean { + return LOW_PRIORITY_RUNTIME_TARGET_LABELS.has(normalizedRuntimeSnapshotText(element.label)); +} + +function isContentRichTapTarget(element: RuntimeElementV1): boolean { + if (!element.actions.includes('tap')) { + return false; + } + + const label = compactRuntimeSnapshotText(element.label); + const identifier = compactRuntimeSnapshotText(element.identifier); + return label.includes(',') || label.length >= 24 || /card$/i.test(identifier); +} + +function isAlreadySelectedRuntimeTarget(element: RuntimeElementV1): boolean { + return ( + element.state?.selected === true || normalizedRuntimeSnapshotText(element.value) === 'selected' + ); +} + +function getRuntimeTargetDisplayPriority(element: RuntimeElementV1): number { + if (isLowPriorityRuntimeTarget(element)) { + return 90; + } + if (isAlreadySelectedRuntimeTarget(element)) { + return 70; + } + if (isContentRichTapTarget(element)) { + return 0; + } + if (element.actions.includes('typeText')) { + return 10; + } + if (element.actions.includes('tap')) { + return 20; + } + return 50; +} + +function sortRuntimeTargetsForDisplay(elements: RuntimeElementV1[]): RuntimeElementV1[] { + return elements + .map((element, index) => ({ element, index })) + .sort((left, right) => { + const priorityDelta = + getRuntimeTargetDisplayPriority(left.element) - + getRuntimeTargetDisplayPriority(right.element); + return priorityDelta === 0 ? left.index - right.index : priorityDelta; + }) + .map(({ element }) => element); +} + +function compactRuntimeElementRow(element: RuntimeElementV1, action: string): string { + return [ + element.ref, + action, + element.role ?? '', + compactRuntimeSnapshotText(element.label), + compactRuntimeSnapshotText(element.value), + compactRuntimeSnapshotText(element.identifier), + ].join('|'); +} + +function primaryRuntimeElementAction(element: RuntimeElementV1): RuntimeActionNameV1 | 'none' { + return ( + (element.actions.includes('typeText') && 'typeText') || + (element.actions.includes('tap') && 'tap') || + (element.actions.includes('swipeWithin') && 'swipeWithin') || + 'none' + ); +} + +function toRuntimeSnapshotCompactCapture( + snapshot: RuntimeSnapshotV1, +): RuntimeSnapshotCompactCapture { + const targets = sortRuntimeTargetsForDisplay( + snapshot.elements.filter( + (element) => + !isHiddenRuntimeTarget(element) && + (element.actions.includes('tap') || element.actions.includes('typeText')), + ), + ).map((element) => { + const action = element.actions.includes('typeText') ? 'typeText' : 'tap'; + return compactRuntimeElementRow(element, action); + }); + const scroll = snapshot.elements + .filter( + (element) => + element.actions.includes('swipeWithin') && + !element.actions.includes('tap') && + !element.actions.includes('typeText'), + ) + .map((element) => compactRuntimeElementRow(element, 'swipe')); + + return { + type: 'runtime-snapshot', + rs: '1', + screenHash: snapshot.screenHash, + seq: snapshot.seq, + count: snapshot.elements.length, + targets, + scroll, + udid: snapshot.simulatorId, + }; +} + +function compactRuntimeElementCandidate(element: RuntimeElementV1): string { + return compactRuntimeElementRow(element, primaryRuntimeElementAction(element)); +} + +function isRuntimeElement(candidate: unknown): candidate is RuntimeElementV1 { + return ( + typeof candidate === 'object' && + candidate !== null && + 'ref' in candidate && + typeof candidate.ref === 'string' && + 'actions' in candidate && + Array.isArray(candidate.actions) + ); +} + +function isRuntimeSnapshotCapture(capture: unknown): capture is RuntimeSnapshotV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot' && + 'elements' in capture && + Array.isArray(capture.elements) + ); +} + +function isRuntimeSnapshotUnchangedCapture( + capture: unknown, +): capture is RuntimeSnapshotUnchangedV1 { + return ( + typeof capture === 'object' && + capture !== null && + 'type' in capture && + capture.type === 'runtime-snapshot-unchanged' + ); +} + +function toRuntimeSnapshotUnchangedCompactCapture( + capture: RuntimeSnapshotUnchangedV1, +): RuntimeSnapshotUnchangedCompactCapture { + return { + type: 'runtime-snapshot-unchanged', + rs: '1', + screenHash: capture.screenHash, + seq: capture.seq, + unchanged: true, + udid: capture.simulatorId, + }; +} + +function projectRuntimeSnapshotData( + data: TData, + options: StructuredEnvelopeOptions, +): + | TData + | (Omit & { + capture: RuntimeSnapshotCompactCapture; + }) { + if (options.runtimeSnapshot === 'full' || typeof data !== 'object' || data === null) { + return data; + } + + const dataWithCapture = data as TData & { capture?: unknown }; + const projectedData = isRuntimeSnapshotCapture(dataWithCapture.capture) + ? { + ...dataWithCapture, + capture: toRuntimeSnapshotCompactCapture(dataWithCapture.capture), + } + : isRuntimeSnapshotUnchangedCapture(dataWithCapture.capture) + ? { + ...dataWithCapture, + capture: toRuntimeSnapshotUnchangedCompactCapture(dataWithCapture.capture), + } + : dataWithCapture; + + const dataWithRuntimeRows = projectedData as typeof projectedData & { + uiError?: { candidates?: unknown[] }; + waitMatch?: { matches?: unknown[] }; + }; + const uiError = Array.isArray(dataWithRuntimeRows.uiError?.candidates) + ? { + ...dataWithRuntimeRows.uiError, + candidates: dataWithRuntimeRows.uiError.candidates.map((candidate) => + isRuntimeElement(candidate) ? compactRuntimeElementCandidate(candidate) : candidate, + ), + } + : dataWithRuntimeRows.uiError; + const waitMatch = Array.isArray(dataWithRuntimeRows.waitMatch?.matches) + ? { + ...dataWithRuntimeRows.waitMatch, + matches: dataWithRuntimeRows.waitMatch.matches.map((match) => + isRuntimeElement(match) ? compactRuntimeElementCandidate(match) : match, + ), + } + : dataWithRuntimeRows.waitMatch; + + if (uiError === dataWithRuntimeRows.uiError && waitMatch === dataWithRuntimeRows.waitMatch) { + return projectedData; + } + + return { + ...projectedData, + ...(uiError ? { uiError } : {}), + ...(waitMatch ? { waitMatch } : {}), + }; +} + export function toStructuredEnvelope( result: TResult, schema: string, schemaVersion: string, -): StructuredOutputEnvelope> { - const { kind: _kind, didError, error, ...data } = result; + options: StructuredEnvelopeOptions = {}, +): StructuredOutputEnvelope { + const { didError, error } = result; + const data = Object.fromEntries( + Object.entries(result).filter( + ([key]) => key !== 'kind' && key !== 'didError' && key !== 'error', + ), + ) as DomainResultData; + const projectedData = projectRuntimeSnapshotData(data, options); return { schema, schemaVersion, didError, error, - data: Object.keys(data).length === 0 ? null : (data as DomainResultData), + data: Object.keys(projectedData).length === 0 ? null : projectedData, }; }