diff --git a/.gitignore b/.gitignore index e3fec0b..a203ad1 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ /target **/*.rs.bk +**/*.bak Cargo.lock .DS_Store **/.DS_Store @@ -16,7 +17,9 @@ adapters/node/pardus-playwright/dist/ # Tauri frontend crates/pardus-tauri/node_modules/ +crates/pardus-tauri/frontend/node_modules/ crates/pardus-tauri/dist/ +crates/pardus-tauri/frontend/dist/ # Web dashboard web/node_modules/ diff --git a/.opencode/plans/pardus-kg-optimization.md b/.opencode/plans/pardus-kg-optimization.md new file mode 100644 index 0000000..c3f0dc0 --- /dev/null +++ b/.opencode/plans/pardus-kg-optimization.md @@ -0,0 +1,142 @@ +# pardus-kg Optimization Plan + +## Pre-requisite Fix (unrelated pre-existing bug) + +**File:** `crates/pardus-core/src/page.rs:783-791` + +The `snapshot()` method is missing the `redirect_chain` field, causing compilation failure. + +```rust +pub fn snapshot(&self) -> PageSnapshot { + PageSnapshot { + url: self.url.clone(), + status: self.status, + content_type: self.content_type.clone(), + title: self.title(), + html: self.html.html(), + redirect_chain: self.redirect_chain.clone(), + } +} +``` + +--- + +## Phase 1: Bug Fixes (B1-B4) + +### B1. Fix query param sorting in `normalize_url` + +**File:** `crates/pardus-kg/src/crawler.rs:224-234` + +Docstring says "sort query params" but code doesn't sort. Sort query pairs before rebuilding URL string. + +### B2. Fix `Box::leak` memory leak in pagination + +**File:** `crates/pardus-kg/src/discovery.rs:139,146` + +Change `segments` from `Vec<&str>` to `Vec` and use local `String` instead of `Box::leak`. + +### B3. Fix duplicate `navigation_graph()` call + +**File:** `crates/pardus-kg/src/crawler.rs:176-221` + +Pass pre-built `nav_graph` into `discover_transitions_for_page` instead of rebuilding inside. + +### B4. Fix same-origin filtering at frontier insertion + +**File:** `crates/pardus-kg/src/crawler.rs:237-239` + +Replace unused `_root_origin` param with actual same-origin check. Skip cross-origin URLs before enqueueing. + +--- + +## Phase 2: Quick Wins (H4, M1) + +### H4. Incremental blake3 hashing + +**File:** `crates/pardus-kg/src/fingerprint.rs` + +Replace 3 functions (`hash_tree_structure`, `hash_resource_set`, `compute_view_state_id`) to use `blake3::Hasher::new()` + incremental `update()` calls instead of building large intermediate strings. + +### M1. Remove duplicate `role_str` function + +**File:** `crates/pardus-kg/src/fingerprint.rs:88-122` + +Delete the local `role_str()` that allocates `String` per call. Use `node.role.role_str()` which already exists on `SemanticRole` in pardus-core and returns `&str`. + +--- + +## Phase 3: Parallel Fetch (H1) + +**File:** `crates/pardus-kg/src/crawler.rs` + +- Add `concurrency: usize` field to `CrawlConfig` (default: 4) +- Add `tokio/sync` and `tokio/rt` features to `Cargo.toml` +- Replace serial BFS loop with batched parallel fetch using `tokio::task::JoinSet` + `tokio::sync::Semaphore` +- Result processing stays serial to maintain BFS ordering and safe `HashMap` mutation +- Parallelism is I/O-bound fetch only; semantic tree building stays in collection loop + +--- + +## Phase 4: Single-Pass HTML (H3) + +### New unified analysis API + +**New file:** `crates/pardus-core/src/page_analysis.rs` + +Create `PageAnalysis` struct with `build(html, page_url)` that produces both `SemanticTree` and `NavigationGraph` through a single API call. Initially delegates to individual builders; evolved later into true single-pass. + +--- + +## Phase 5: Memory Optimization (M2-M4, L1, L3) + +### M2. Optional tree storage + +- Add `store_full_trees: bool` to `CrawlConfig` +- Make `semantic_tree` and `navigation_graph` `Option` on `ViewState` +- Skip serializing when `None` + +### M3. Type-safe HashMap keys + +**File:** `crates/pardus-kg/src/graph.rs` + +Change `states: HashMap` to `HashMap`. Update `add_state` and `has_state` accordingly. Update all callers. + +### M4. HashSet for resources + +Change `resource_urls: BTreeSet` to `HashSet` across state.rs and fingerprint.rs. Sort only when hashing. + +### L1. Remove dead `verify_transitions` config + +Remove the unused field from `CrawlConfig`. + +### L3. Crawler-level retry + +Add `retries: u8` to `FrontierEntry`. On fetch failure, re-enqueue up to 2 retries. + +--- + +## File Change Summary + +| Order | File | Changes | +|-------|------|---------| +| 0 | `pardus-core/src/page.rs` | Fix missing `redirect_chain` in `snapshot()` | +| 1 | `pardus-kg/src/discovery.rs` | B2: Fix `Box::leak` | +| 1 | `pardus-kg/src/crawler.rs` | B1: query param sort; B3: pass nav_graph; B4: same-origin filter | +| 2 | `pardus-kg/src/fingerprint.rs` | H4: incremental blake3; M1: remove role_str; M4: HashSet | +| 3 | `pardus-kg/src/crawler.rs` | H1: parallel fetch | +| 3 | `pardus-kg/src/config.rs` | Add `concurrency` | +| 3 | `pardus-kg/Cargo.toml` | Add tokio features | +| 4 | `pardus-core/src/page_analysis.rs` | New file: unified PageAnalysis | +| 4 | `pardus-kg/src/crawler.rs` | Use PageAnalysis | +| 5 | `pardus-kg/src/graph.rs` | M3: HashMap key type | +| 5 | `pardus-kg/src/state.rs` | M2: optional trees; M4: HashSet | +| 5 | `pardus-kg/src/config.rs` | L1: remove dead field; add store_full_trees | + +## Verification + +```bash +cargo test -p pardus-kg +cargo test -p pardus-core +cargo clippy -p pardus-kg -- -D warnings +cargo build -p pardus-kg +``` diff --git a/Cargo.toml b/Cargo.toml index 0694343..1699549 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -30,7 +30,7 @@ futures-util = "0.3" blake3 = "1" lol_html = "2" reqwest = { version = "0.12", features = ["cookies", "gzip", "brotli", "deflate", "json"] } -rquest = { version = "5", features = ["cookies", "gzip", "brotli", "deflate", "json", "stream", "socks", "multipart"] } +rquest = { version = "5", features = ["cookies", "gzip", "brotli", "deflate", "json", "stream", "socks", "multipart", "webpki-roots"] } rquest-util = "2" parking_lot = "0.12" base64 = "0.22" diff --git a/ai-agent/pardus-browser/package-lock.json b/ai-agent/pardus-browser/package-lock.json deleted file mode 100644 index 71bbadf..0000000 --- a/ai-agent/pardus-browser/package-lock.json +++ /dev/null @@ -1,1063 +0,0 @@ -{ - "name": "pardus-browser-agent", - "version": "0.1.0", - "lockfileVersion": 3, - "requires": true, - "packages": { - "": { - "name": "pardus-browser-agent", - "version": "0.1.0", - "dependencies": { - "openai": "^4.28.0", - "ws": "^8.16.0" - }, - "devDependencies": { - "@types/node": "^20.11.0", - "@types/ws": "^8.5.10", - "tsx": "^4.7.0", - "typescript": "^5.3.0" - }, - "engines": { - "node": ">=18.0.0" - } - }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.27.7.tgz", - "integrity": "sha512-EKX3Qwmhz1eMdEJokhALr0YiD0lhQNwDqkPYyPhiSwKrh7/4KRjQc04sZ8db+5DVVnZ1LmbNDI1uAMPEUBnQPg==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.27.7.tgz", - "integrity": "sha512-jbPXvB4Yj2yBV7HUfE2KHe4GJX51QplCN1pGbYjvsyCZbQmies29EoJbkEc+vYuU5o45AfQn37vZlyXy4YJ8RQ==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.27.7.tgz", - "integrity": "sha512-62dPZHpIXzvChfvfLJow3q5dDtiNMkwiRzPylSCfriLvZeq0a1bWChrGx/BbUbPwOrsWKMn8idSllklzBy+dgQ==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.27.7.tgz", - "integrity": "sha512-x5VpMODneVDb70PYV2VQOmIUUiBtY3D3mPBG8NxVk5CogneYhkR7MmM3yR/uMdITLrC1ml/NV1rj4bMJuy9MCg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.27.7.tgz", - "integrity": "sha512-5lckdqeuBPlKUwvoCXIgI2D9/ABmPq3Rdp7IfL70393YgaASt7tbju3Ac+ePVi3KDH6N2RqePfHnXkaDtY9fkw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.27.7.tgz", - "integrity": "sha512-rYnXrKcXuT7Z+WL5K980jVFdvVKhCHhUwid+dDYQpH+qu+TefcomiMAJpIiC2EM3Rjtq0sO3StMV/+3w3MyyqQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.27.7.tgz", - "integrity": "sha512-B48PqeCsEgOtzME2GbNM2roU29AMTuOIN91dsMO30t+Ydis3z/3Ngoj5hhnsOSSwNzS+6JppqWsuhTp6E82l2w==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.27.7.tgz", - "integrity": "sha512-jOBDK5XEjA4m5IJK3bpAQF9/Lelu/Z9ZcdhTRLf4cajlB+8VEhFFRjWgfy3M1O4rO2GQ/b2dLwCUGpiF/eATNQ==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.27.7.tgz", - "integrity": "sha512-RkT/YXYBTSULo3+af8Ib0ykH8u2MBh57o7q/DAs3lTJlyVQkgQvlrPTnjIzzRPQyavxtPtfg0EopvDyIt0j1rA==", - "cpu": [ - "arm" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.27.7.tgz", - "integrity": "sha512-RZPHBoxXuNnPQO9rvjh5jdkRmVizktkT7TCDkDmQ0W2SwHInKCAV95GRuvdSvA7w4VMwfCjUiPwDi0ZO6Nfe9A==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.27.7.tgz", - "integrity": "sha512-GA48aKNkyQDbd3KtkplYWT102C5sn/EZTY4XROkxONgruHPU72l+gW+FfF8tf2cFjeHaRbWpOYa/uRBz/Xq1Pg==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.27.7.tgz", - "integrity": "sha512-a4POruNM2oWsD4WKvBSEKGIiWQF8fZOAsycHOt6JBpZ+JN2n2JH9WAv56SOyu9X5IqAjqSIPTaJkqN8F7XOQ5Q==", - "cpu": [ - "loong64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.27.7.tgz", - "integrity": "sha512-KabT5I6StirGfIz0FMgl1I+R1H73Gp0ofL9A3nG3i/cYFJzKHhouBV5VWK1CSgKvVaG4q1RNpCTR2LuTVB3fIw==", - "cpu": [ - "mips64el" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.27.7.tgz", - "integrity": "sha512-gRsL4x6wsGHGRqhtI+ifpN/vpOFTQtnbsupUF5R5YTAg+y/lKelYR1hXbnBdzDjGbMYjVJLJTd2OFmMewAgwlQ==", - "cpu": [ - "ppc64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.27.7.tgz", - "integrity": "sha512-hL25LbxO1QOngGzu2U5xeXtxXcW+/GvMN3ejANqXkxZ/opySAZMrc+9LY/WyjAan41unrR3YrmtTsUpwT66InQ==", - "cpu": [ - "riscv64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.27.7.tgz", - "integrity": "sha512-2k8go8Ycu1Kb46vEelhu1vqEP+UeRVj2zY1pSuPdgvbd5ykAw82Lrro28vXUrRmzEsUV0NzCf54yARIK8r0fdw==", - "cpu": [ - "s390x" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.27.7.tgz", - "integrity": "sha512-hzznmADPt+OmsYzw1EE33ccA+HPdIqiCRq7cQeL1Jlq2gb1+OyWBkMCrYGBJ+sxVzve2ZJEVeePbLM2iEIZSxA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.27.7.tgz", - "integrity": "sha512-b6pqtrQdigZBwZxAn1UpazEisvwaIDvdbMbmrly7cDTMFnw/+3lVxxCTGOrkPVnsYIosJJXAsILG9XcQS+Yu6w==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.27.7.tgz", - "integrity": "sha512-OfatkLojr6U+WN5EDYuoQhtM+1xco+/6FSzJJnuWiUw5eVcicbyK3dq5EeV/QHT1uy6GoDhGbFpprUiHUYggrw==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.27.7.tgz", - "integrity": "sha512-AFuojMQTxAz75Fo8idVcqoQWEHIXFRbOc1TrVcFSgCZtQfSdc1RXgB3tjOn/krRHENUB4j00bfGjyl2mJrU37A==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.27.7.tgz", - "integrity": "sha512-+A1NJmfM8WNDv5CLVQYJ5PshuRm/4cI6WMZRg1by1GwPIQPCTs1GLEUHwiiQGT5zDdyLiRM/l1G0Pv54gvtKIg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openharmony-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/openharmony-arm64/-/openharmony-arm64-0.27.7.tgz", - "integrity": "sha512-+KrvYb/C8zA9CU/g0sR6w2RBw7IGc5J2BPnc3dYc5VJxHCSF1yNMxTV5LQ7GuKteQXZtspjFbiuW5/dOj7H4Yw==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "openharmony" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.27.7.tgz", - "integrity": "sha512-ikktIhFBzQNt/QDyOL580ti9+5mL/YZeUPKU2ivGtGjdTYoqz6jObj6nOMfhASpS4GU4Q/Clh1QtxWAvcYKamA==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.27.7.tgz", - "integrity": "sha512-7yRhbHvPqSpRUV7Q20VuDwbjW5kIMwTHpptuUzV+AA46kiPze5Z7qgt6CLCK3pWFrHeNfDd1VKgyP4O+ng17CA==", - "cpu": [ - "arm64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.27.7.tgz", - "integrity": "sha512-SmwKXe6VHIyZYbBLJrhOoCJRB/Z1tckzmgTLfFYOfpMAx63BJEaL9ExI8x7v0oAO3Zh6D/Oi1gVxEYr5oUCFhw==", - "cpu": [ - "ia32" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.27.7.tgz", - "integrity": "sha512-56hiAJPhwQ1R4i+21FVF7V8kSD5zZTdHcVuRFMW0hn753vVfQN8xlx4uOPT4xoGH0Z/oVATuR82AiqSTDIpaHg==", - "cpu": [ - "x64" - ], - "dev": true, - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@types/node": { - "version": "20.19.37", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.19.37.tgz", - "integrity": "sha512-8kzdPJ3FsNsVIurqBs7oodNnCEVbni9yUEkaHbgptDACOPW04jimGagZ51E6+lXUwJjgnBw+hyko/lkFWCldqw==", - "license": "MIT", - "dependencies": { - "undici-types": "~6.21.0" - } - }, - "node_modules/@types/node-fetch": { - "version": "2.6.13", - "resolved": "https://registry.npmjs.org/@types/node-fetch/-/node-fetch-2.6.13.tgz", - "integrity": "sha512-QGpRVpzSaUs30JBSGPjOg4Uveu384erbHBoT1zeONvyCfwQxIkUshLAOqN/k9EjGviPRmWTTe6aH2qySWKTVSw==", - "license": "MIT", - "dependencies": { - "@types/node": "*", - "form-data": "^4.0.4" - } - }, - "node_modules/@types/ws": { - "version": "8.18.1", - "resolved": "https://registry.npmjs.org/@types/ws/-/ws-8.18.1.tgz", - "integrity": "sha512-ThVF6DCVhA8kUGy+aazFQ4kXQ7E1Ty7A3ypFOe0IcJV8O/M511G99AW24irKrW56Wt44yG9+ij8FaqoBGkuBXg==", - "dev": true, - "license": "MIT", - "dependencies": { - "@types/node": "*" - } - }, - "node_modules/abort-controller": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz", - "integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==", - "license": "MIT", - "dependencies": { - "event-target-shim": "^5.0.0" - }, - "engines": { - "node": ">=6.5" - } - }, - "node_modules/agentkeepalive": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/agentkeepalive/-/agentkeepalive-4.6.0.tgz", - "integrity": "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==", - "license": "MIT", - "dependencies": { - "humanize-ms": "^1.2.1" - }, - "engines": { - "node": ">= 8.0.0" - } - }, - "node_modules/asynckit": { - "version": "0.4.0", - "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", - "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", - "license": "MIT" - }, - "node_modules/call-bind-apply-helpers": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", - "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/combined-stream": { - "version": "1.0.8", - "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", - "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", - "license": "MIT", - "dependencies": { - "delayed-stream": "~1.0.0" - }, - "engines": { - "node": ">= 0.8" - } - }, - "node_modules/delayed-stream": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", - "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", - "license": "MIT", - "engines": { - "node": ">=0.4.0" - } - }, - "node_modules/dunder-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", - "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.1", - "es-errors": "^1.3.0", - "gopd": "^1.2.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-define-property": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", - "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-errors": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", - "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-object-atoms": { - "version": "1.1.1", - "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", - "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/es-set-tostringtag": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/es-set-tostringtag/-/es-set-tostringtag-2.1.0.tgz", - "integrity": "sha512-j6vWzfrGVfyXxge+O0x5sh6cvxAog0a/4Rdd2K36zCMV5eJ+/+tOAngRO8cODMNWbVRdVlmGZQL2YS3yR8bIUA==", - "license": "MIT", - "dependencies": { - "es-errors": "^1.3.0", - "get-intrinsic": "^1.2.6", - "has-tostringtag": "^1.0.2", - "hasown": "^2.0.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/esbuild": { - "version": "0.27.7", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.27.7.tgz", - "integrity": "sha512-IxpibTjyVnmrIQo5aqNpCgoACA/dTKLTlhMHihVHhdkxKyPO1uBBthumT0rdHmcsk9uMonIWS0m4FljWzILh3w==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.27.7", - "@esbuild/android-arm": "0.27.7", - "@esbuild/android-arm64": "0.27.7", - "@esbuild/android-x64": "0.27.7", - "@esbuild/darwin-arm64": "0.27.7", - "@esbuild/darwin-x64": "0.27.7", - "@esbuild/freebsd-arm64": "0.27.7", - "@esbuild/freebsd-x64": "0.27.7", - "@esbuild/linux-arm": "0.27.7", - "@esbuild/linux-arm64": "0.27.7", - "@esbuild/linux-ia32": "0.27.7", - "@esbuild/linux-loong64": "0.27.7", - "@esbuild/linux-mips64el": "0.27.7", - "@esbuild/linux-ppc64": "0.27.7", - "@esbuild/linux-riscv64": "0.27.7", - "@esbuild/linux-s390x": "0.27.7", - "@esbuild/linux-x64": "0.27.7", - "@esbuild/netbsd-arm64": "0.27.7", - "@esbuild/netbsd-x64": "0.27.7", - "@esbuild/openbsd-arm64": "0.27.7", - "@esbuild/openbsd-x64": "0.27.7", - "@esbuild/openharmony-arm64": "0.27.7", - "@esbuild/sunos-x64": "0.27.7", - "@esbuild/win32-arm64": "0.27.7", - "@esbuild/win32-ia32": "0.27.7", - "@esbuild/win32-x64": "0.27.7" - } - }, - "node_modules/event-target-shim": { - "version": "5.0.1", - "resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz", - "integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ==", - "license": "MIT", - "engines": { - "node": ">=6" - } - }, - "node_modules/form-data": { - "version": "4.0.5", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.5.tgz", - "integrity": "sha512-8RipRLol37bNs2bhoV67fiTEvdTrbMUYcFTiy3+wuuOnUog2QBHCZWXDRijWQfAkhBj2Uf5UnVaiWwA5vdd82w==", - "license": "MIT", - "dependencies": { - "asynckit": "^0.4.0", - "combined-stream": "^1.0.8", - "es-set-tostringtag": "^2.1.0", - "hasown": "^2.0.2", - "mime-types": "^2.1.12" - }, - "engines": { - "node": ">= 6" - } - }, - "node_modules/form-data-encoder": { - "version": "1.7.2", - "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", - "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==", - "license": "MIT" - }, - "node_modules/formdata-node": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", - "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", - "license": "MIT", - "dependencies": { - "node-domexception": "1.0.0", - "web-streams-polyfill": "4.0.0-beta.3" - }, - "engines": { - "node": ">= 12.20" - } - }, - "node_modules/fsevents": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", - "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", - "dev": true, - "hasInstallScript": true, - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": "^8.16.0 || ^10.6.0 || >=11.0.0" - } - }, - "node_modules/function-bind": { - "version": "1.1.2", - "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", - "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "license": "MIT", - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-intrinsic": { - "version": "1.3.0", - "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", - "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "license": "MIT", - "dependencies": { - "call-bind-apply-helpers": "^1.0.2", - "es-define-property": "^1.0.1", - "es-errors": "^1.3.0", - "es-object-atoms": "^1.1.1", - "function-bind": "^1.1.2", - "get-proto": "^1.0.1", - "gopd": "^1.2.0", - "has-symbols": "^1.1.0", - "hasown": "^2.0.2", - "math-intrinsics": "^1.1.0" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/get-proto": { - "version": "1.0.1", - "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", - "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "license": "MIT", - "dependencies": { - "dunder-proto": "^1.0.1", - "es-object-atoms": "^1.0.0" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/get-tsconfig": { - "version": "4.13.7", - "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.13.7.tgz", - "integrity": "sha512-7tN6rFgBlMgpBML5j8typ92BKFi2sFQvIdpAqLA2beia5avZDrMs0FLZiM5etShWq5irVyGcGMEA1jcDaK7A/Q==", - "dev": true, - "license": "MIT", - "dependencies": { - "resolve-pkg-maps": "^1.0.0" - }, - "funding": { - "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" - } - }, - "node_modules/gopd": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", - "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-symbols": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", - "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/has-tostringtag": { - "version": "1.0.2", - "resolved": "https://registry.npmjs.org/has-tostringtag/-/has-tostringtag-1.0.2.tgz", - "integrity": "sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==", - "license": "MIT", - "dependencies": { - "has-symbols": "^1.0.3" - }, - "engines": { - "node": ">= 0.4" - }, - "funding": { - "url": "https://github.com/sponsors/ljharb" - } - }, - "node_modules/hasown": { - "version": "2.0.2", - "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", - "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "license": "MIT", - "dependencies": { - "function-bind": "^1.1.2" - }, - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/humanize-ms": { - "version": "1.2.1", - "resolved": "https://registry.npmjs.org/humanize-ms/-/humanize-ms-1.2.1.tgz", - "integrity": "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==", - "license": "MIT", - "dependencies": { - "ms": "^2.0.0" - } - }, - "node_modules/math-intrinsics": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", - "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "license": "MIT", - "engines": { - "node": ">= 0.4" - } - }, - "node_modules/mime-db": { - "version": "1.52.0", - "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", - "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", - "license": "MIT", - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/mime-types": { - "version": "2.1.35", - "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", - "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", - "license": "MIT", - "dependencies": { - "mime-db": "1.52.0" - }, - "engines": { - "node": ">= 0.6" - } - }, - "node_modules/ms": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", - "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", - "license": "MIT" - }, - "node_modules/node-domexception": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/node-domexception/-/node-domexception-1.0.0.tgz", - "integrity": "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==", - "deprecated": "Use your platform's native DOMException instead", - "funding": [ - { - "type": "github", - "url": "https://github.com/sponsors/jimmywarting" - }, - { - "type": "github", - "url": "https://paypal.me/jimmywarting" - } - ], - "license": "MIT", - "engines": { - "node": ">=10.5.0" - } - }, - "node_modules/node-fetch": { - "version": "2.7.0", - "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz", - "integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==", - "license": "MIT", - "dependencies": { - "whatwg-url": "^5.0.0" - }, - "engines": { - "node": "4.x || >=6.0.0" - }, - "peerDependencies": { - "encoding": "^0.1.0" - }, - "peerDependenciesMeta": { - "encoding": { - "optional": true - } - } - }, - "node_modules/openai": { - "version": "4.104.0", - "resolved": "https://registry.npmjs.org/openai/-/openai-4.104.0.tgz", - "integrity": "sha512-p99EFNsA/yX6UhVO93f5kJsDRLAg+CTA2RBqdHK4RtK8u5IJw32Hyb2dTGKbnnFmnuoBv5r7Z2CURI9sGZpSuA==", - "license": "Apache-2.0", - "dependencies": { - "@types/node": "^18.11.18", - "@types/node-fetch": "^2.6.4", - "abort-controller": "^3.0.0", - "agentkeepalive": "^4.2.1", - "form-data-encoder": "1.7.2", - "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7" - }, - "bin": { - "openai": "bin/cli" - }, - "peerDependencies": { - "ws": "^8.18.0", - "zod": "^3.23.8" - }, - "peerDependenciesMeta": { - "ws": { - "optional": true - }, - "zod": { - "optional": true - } - } - }, - "node_modules/openai/node_modules/@types/node": { - "version": "18.19.130", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.19.130.tgz", - "integrity": "sha512-GRaXQx6jGfL8sKfaIDD6OupbIHBr9jv7Jnaml9tB7l4v068PAOXqfcujMMo5PhbIs6ggR1XODELqahT2R8v0fg==", - "license": "MIT", - "dependencies": { - "undici-types": "~5.26.4" - } - }, - "node_modules/openai/node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "license": "MIT" - }, - "node_modules/resolve-pkg-maps": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", - "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", - "dev": true, - "license": "MIT", - "funding": { - "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" - } - }, - "node_modules/tr46": { - "version": "0.0.3", - "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", - "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==", - "license": "MIT" - }, - "node_modules/tsx": { - "version": "4.21.0", - "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.21.0.tgz", - "integrity": "sha512-5C1sg4USs1lfG0GFb2RLXsdpXqBSEhAaA/0kPL01wxzpMqLILNxIxIOKiILz+cdg/pLnOUxFYOR5yhHU666wbw==", - "dev": true, - "license": "MIT", - "dependencies": { - "esbuild": "~0.27.0", - "get-tsconfig": "^4.7.5" - }, - "bin": { - "tsx": "dist/cli.mjs" - }, - "engines": { - "node": ">=18.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - } - }, - "node_modules/typescript": { - "version": "5.9.3", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.9.3.tgz", - "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==", - "dev": true, - "license": "Apache-2.0", - "bin": { - "tsc": "bin/tsc", - "tsserver": "bin/tsserver" - }, - "engines": { - "node": ">=14.17" - } - }, - "node_modules/undici-types": { - "version": "6.21.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", - "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", - "license": "MIT" - }, - "node_modules/web-streams-polyfill": { - "version": "4.0.0-beta.3", - "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", - "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", - "license": "MIT", - "engines": { - "node": ">= 14" - } - }, - "node_modules/webidl-conversions": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", - "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==", - "license": "BSD-2-Clause" - }, - "node_modules/whatwg-url": { - "version": "5.0.0", - "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", - "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", - "license": "MIT", - "dependencies": { - "tr46": "~0.0.3", - "webidl-conversions": "^3.0.0" - } - }, - "node_modules/ws": { - "version": "8.20.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.20.0.tgz", - "integrity": "sha512-sAt8BhgNbzCtgGbt2OxmpuryO63ZoDk/sqaB/znQm94T4fCEsy/yV+7CdC1kJhOU9lboAEU7R3kquuycDoibVA==", - "license": "MIT", - "peer": true, - "engines": { - "node": ">=10.0.0" - }, - "peerDependencies": { - "bufferutil": "^4.0.1", - "utf-8-validate": ">=5.0.2" - }, - "peerDependenciesMeta": { - "bufferutil": { - "optional": true - }, - "utf-8-validate": { - "optional": true - } - } - } - } -} diff --git a/ai-agent/pardus-browser/src/__tests__/agent/Agent.test.ts b/ai-agent/pardus-browser/src/__tests__/agent/Agent.test.ts index c04d852..79b7acb 100644 --- a/ai-agent/pardus-browser/src/__tests__/agent/Agent.test.ts +++ b/ai-agent/pardus-browser/src/__tests__/agent/Agent.test.ts @@ -59,7 +59,7 @@ describe('Agent', () => { it('should have default tool configuration', () => { const config = agent.getToolConfig(); - assert.strictEqual(config?.parallel, false); + assert.strictEqual(config?.parallel, true); assert.strictEqual(config?.continueOnError, true); assert.strictEqual(config?.defaultRetryConfig, undefined); }); diff --git a/ai-agent/pardus-browser/src/__tests__/llm/prompts.test.ts b/ai-agent/pardus-browser/src/__tests__/llm/prompts.test.ts index 3df692a..734e247 100644 --- a/ai-agent/pardus-browser/src/__tests__/llm/prompts.test.ts +++ b/ai-agent/pardus-browser/src/__tests__/llm/prompts.test.ts @@ -10,8 +10,8 @@ describe('Prompts', () => { }); it('should explain browser instances', () => { - assert.ok(SYSTEM_PROMPT.includes('browser instance')); - assert.ok(SYSTEM_PROMPT.includes('isolated')); + assert.ok(SYSTEM_PROMPT.includes('browser_new')); + assert.ok(SYSTEM_PROMPT.includes('instance')); }); it('should explain semantic tree', () => { @@ -37,22 +37,22 @@ describe('Prompts', () => { }); it('should mention correct tool count', () => { - assert.ok(SYSTEM_PROMPT.includes('19 browser tools')); + assert.ok(SYSTEM_PROMPT.includes('40)')); }); it('should have workflow steps', () => { assert.ok(SYSTEM_PROMPT.includes('browser_new()')); - assert.ok(SYSTEM_PROMPT.includes('browser_navigate()')); - assert.ok(SYSTEM_PROMPT.includes('browser_click()')); + assert.ok(SYSTEM_PROMPT.includes('browser_navigate')); + assert.ok(SYSTEM_PROMPT.includes('browser_click')); }); it('should explain element IDs', () => { - assert.ok(SYSTEM_PROMPT.includes('[#1]')); + assert.ok(SYSTEM_PROMPT.includes('[#N')); assert.ok(SYSTEM_PROMPT.includes('Element IDs')); }); - it('should include best practices', () => { - assert.ok(SYSTEM_PROMPT.includes('Best Practices')); + it('should include key rules', () => { + assert.ok(SYSTEM_PROMPT.includes('Key Rules')); }); }); diff --git a/ai-agent/pardus-browser/src/__tests__/tools/definitions.test.ts b/ai-agent/pardus-browser/src/__tests__/tools/definitions.test.ts index 8ce2bce..edea9d4 100644 --- a/ai-agent/pardus-browser/src/__tests__/tools/definitions.test.ts +++ b/ai-agent/pardus-browser/src/__tests__/tools/definitions.test.ts @@ -4,8 +4,8 @@ import { browserTools, BrowserToolName } from '../../tools/definitions.js'; describe('Tool Definitions', () => { describe('browserTools', () => { - it('should have 19 tools', () => { - assert.strictEqual(browserTools.length, 19); + it('should have 40 tools', () => { + assert.strictEqual(browserTools.length, 40); }); it('should include all expected tools', () => { @@ -247,9 +247,30 @@ describe('Tool Definitions', () => { 'browser_get_state', 'browser_list', 'browser_close', + 'browser_extract_text', + 'browser_extract_links', + 'browser_find', + 'browser_extract_table', + 'browser_extract_metadata', + 'browser_screenshot', + 'browser_select', + 'browser_press_key', + 'browser_hover', + 'browser_tab_new', + 'browser_tab_switch', + 'browser_tab_close', + 'browser_download', + 'browser_upload', + 'browser_pdf_extract', + 'browser_feed_parse', + 'browser_network_block', + 'browser_network_log', + 'browser_iframe_enter', + 'browser_iframe_exit', + 'browser_diff', ]; - assert.strictEqual(toolNames.length, 19); + assert.strictEqual(toolNames.length, 40); }); }); }); diff --git a/ai-agent/pardus-browser/src/core/BrowserInstance.ts b/ai-agent/pardus-browser/src/core/BrowserInstance.ts index 494236c..13ab858 100644 --- a/ai-agent/pardus-browser/src/core/BrowserInstance.ts +++ b/ai-agent/pardus-browser/src/core/BrowserInstance.ts @@ -19,6 +19,33 @@ import { BrowserGetActionPlanResult, BrowserAutoFillResult, BrowserWaitResult, + BrowserExtractTextResult, + LinkItem, + BrowserExtractLinksResult, + TextMatch, + BrowserFindResult, + BrowserExtractTableResult, + BrowserExtractMetadataResult, + BrowserScreenshotResult, + BrowserSelectResult, + BrowserPressKeyResult, + BrowserHoverResult, + BrowserTabNewResult, + BrowserTabSwitchResult, + BrowserTabCloseResult, + BrowserTabListResult, + TabInfo, + BrowserDownloadResult, + BrowserUploadResult, + BrowserPdfExtractResult, + FeedItem, + BrowserFeedParseResult, + BrowserNetworkBlockResult, + BrowserNetworkLogResult, + BrowserIframeEnterResult, + BrowserIframeExitResult, + PageDiffChange, + BrowserDiffResult, } from './types.js'; interface CDPResponse { @@ -816,6 +843,680 @@ export class BrowserInstance extends EventEmitter { } } + // ── Extraction methods ──────────────────────────────────────── + + async extractText(selector?: string): Promise { + try { + const scopeExpr = selector + ? `document.querySelector("${selector.replace(/"/g, '\\"')}")` + : 'document.body'; + + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const root = ${scopeExpr}; + if (!root) return { error: "Element not found: ${selector?.replace(/"/g, '\\"')}" }; + const clone = root.cloneNode(true); + const remove = ['script','style','noscript','svg','iframe','nav','footer','header','aside','[role="navigation"]','[role="banner"]','[role="contentinfo"]','[role="complementary"]','.ad','.ads','.advertisement','.sidebar','.cookie-banner','.popup','.modal']; + for (const sel of remove) { + clone.querySelectorAll(sel).forEach(el => el.remove()); + } + clone.querySelectorAll('*').forEach(el => { + const style = el.getAttribute('style') || ''; + if (style.includes('display:none') || style.includes('display: none') || style.includes('visibility:hidden') || style.includes('visibility: hidden')) { + el.remove(); + } + }); + let text = clone.textContent || ''; + text = text.replace(/\\s+/g, ' ').replace(/\\s+\\n/g, '\\n').trim(); + const wordCount = text.split(/\\s+/).filter(w => w.length > 0).length; + return { text, wordCount }; + })()`, + returnByValue: true, + } + ) as { result?: { value?: { text?: string; wordCount?: number; error?: string } } }; + + const value = result.result?.value; + if (value?.error) { + return { success: false, text: '', word_count: 0, error: value.error }; + } + + return { + success: true, + text: value?.text ?? '', + word_count: value?.wordCount ?? 0, + }; + } catch (error) { + return { + success: false, + text: '', + word_count: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + async extractLinks(filter?: string, domain?: string): Promise { + try { + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const links = Array.from(document.querySelectorAll('a[href]')); + const filterLower = ${JSON.stringify(filter?.toLowerCase() ?? '')}; + const domainFilter = ${JSON.stringify(domain?.toLowerCase() ?? '')}; + const mapped = links.map(a => ({ + text: (a.textContent || '').trim().substring(0, 200), + href: a.href, + element_id: a.getAttribute('data-pardus-id') || null, + })).filter(l => { + if (!l.href || l.href === '#' || l.href.startsWith('javascript:')) return false; + if (filterLower && !l.text.toLowerCase().includes(filterLower) && !l.href.toLowerCase().includes(filterLower)) return false; + if (domainFilter) { + try { if (!new URL(l.href).hostname.toLowerCase().includes(domainFilter)) return false; } catch { return false; } + } + return true; + }); + return { links: mapped, count: mapped.length }; + })()`, + returnByValue: true, + } + ) as { result?: { value?: { links?: Array<{ text: string; href: string; element_id?: string | null }>; count?: number } } }; + + const value = result.result?.value; + const links: LinkItem[] = (value?.links ?? []).map(l => ({ + text: l.text, + href: l.href, + ...(l.element_id ? { element_id: l.element_id } : {}), + })); + + return { + success: true, + links, + count: value?.count ?? links.length, + }; + } catch (error) { + return { + success: false, + links: [], + count: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + async find(query: string, caseSensitive = false): Promise { + try { + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const query = ${JSON.stringify(query)}; + const caseSensitive = ${caseSensitive}; + const matches = []; + const walker = document.createTreeWalker(document.body, NodeFilter.SHOW_TEXT, null); + while (walker.nextNode()) { + const node = walker.currentNode; + const text = node.textContent; + if (!text) continue; + const searchIn = caseSensitive ? text : text.toLowerCase(); + const searchFor = caseSensitive ? query : query.toLowerCase(); + let idx = searchIn.indexOf(searchFor); + while (idx !== -1) { + const start = Math.max(0, idx - 50); + const end = Math.min(text.length, idx + query.length + 50); + const context = (start > 0 ? '...' : '') + text.substring(start, end) + (end < text.length ? '...' : ''); + const parent = node.parentElement; + matches.push({ + text: text.substring(idx, idx + query.length), + context, + element_id: parent ? parent.getAttribute('data-pardus-id') || null : null, + }); + if (matches.length >= 50) return { matches, count: matches.length, truncated: true }; + idx = searchIn.indexOf(searchFor, idx + 1); + } + } + return { matches, count: matches.length, truncated: false }; + })()`, + returnByValue: true, + } + ) as { result?: { value?: { matches?: TextMatch[]; count?: number; truncated?: boolean } } }; + + const value = result.result?.value; + return { + success: true, + matches: value?.matches ?? [], + count: value?.count ?? 0, + }; + } catch (error) { + return { + success: false, + matches: [], + count: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + async extractTable(selector?: string): Promise { + try { + const tableSelector = selector ? `"${selector.replace(/"/g, '\\"')}"` : '"table"'; + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const table = document.querySelector(${tableSelector}); + if (!table) return { error: "Table not found" }; + const headers = []; + const rows = []; + const thCells = table.querySelectorAll('thead th, tr:first-child th'); + thCells.forEach(th => headers.push(th.textContent.trim())); + const bodyRows = table.querySelectorAll('tbody tr, tr'); + bodyRows.forEach((tr, idx) => { + if (idx === 0 && thCells.length > 0 && tr.querySelector('th')) return; + const cells = Array.from(tr.querySelectorAll('td, th')).map(c => c.textContent.trim()); + if (cells.length > 0) rows.push(cells); + }); + if (headers.length === 0 && rows.length > 0) { + rows[0].forEach(() => headers.push('')); + } + return { headers, rows, row_count: rows.length }; + })()`, + returnByValue: true, + } + ) as { result?: { value?: { headers?: string[]; rows?: string[][]; row_count?: number; error?: string } } }; + + const value = result.result?.value; + if (value?.error) { + return { success: false, headers: [], rows: [], row_count: 0, error: value.error }; + } + + return { + success: true, + headers: value?.headers ?? [], + rows: value?.rows ?? [], + row_count: value?.row_count ?? 0, + }; + } catch (error) { + return { + success: false, + headers: [], + rows: [], + row_count: 0, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + async extractMetadata(): Promise { + try { + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const title = document.title || ''; + const descMeta = document.querySelector('meta[name="description"]'); + const description = descMeta ? descMeta.getAttribute('content') || '' : ''; + const jsonLd = Array.from(document.querySelectorAll('script[type="application/ld+json"]')) + .map(s => { try { return JSON.parse(s.textContent); } catch { return null; } }) + .filter(v => v !== null); + const og = {}; + document.querySelectorAll('meta[property^="og:"]').forEach(m => { + const prop = m.getAttribute('property'); + const content = m.getAttribute('content'); + if (prop && content) og[prop.replace('og:', '')] = content; + }); + const meta = {}; + document.querySelectorAll('meta[name]').forEach(m => { + const name = m.getAttribute('name'); + const content = m.getAttribute('content'); + if (name && content && name !== 'viewport' && name !== 'charset') { + meta[name] = content; + } + }); + return { title, description, json_ld: jsonLd, open_graph: og, meta }; + })()`, + returnByValue: true, + } + ) as { result?: { value?: BrowserExtractMetadataResult } }; + + const value = result.result?.value; + return { + success: true, + title: value?.title ?? '', + description: value?.description, + json_ld: value?.json_ld ?? [], + open_graph: value?.open_graph ?? {}, + meta: value?.meta ?? {}, + }; + } catch (error) { + return { + success: false, + title: '', + json_ld: [], + open_graph: {}, + meta: {}, + error: error instanceof Error ? error.message : String(error), + }; + } + } + + async screenshot(): Promise { + try { + const result = await this.sendCommand( + 'Page.captureScreenshot', + { format: 'png' }, + this.requestTimeout + ) as { data?: string; mimeType?: string }; + + return { + success: true, + data: result.data ?? '', + mime_type: result.mimeType ?? 'image/png', + }; + } catch (error) { + return { + success: false, + data: '', + mime_type: '', + error: error instanceof Error ? error.message : String(error), + }; + } + } + + // ── Interaction methods ─────────────────────────────────────── + + async selectOption(elementId: string, value: string): Promise { + try { + const result = await this.sendCommand( + 'Runtime.evaluate', + { + expression: `(function() { + const el = document.querySelector('[data-pardus-id="${elementId}"]'); + if (!el) return { success: false, error: 'Element not found' }; + if (el.tagName !== 'SELECT') return { success: false, error: 'Element is not a element by its value.', + parameters: { + type: 'object', + properties: { + instance_id: { + type: 'string', + description: 'The browser instance ID', + }, + element_id: { + type: 'string', + description: 'Element ID of the element.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + element_id: { type: 'string', description: 'Element ID of the file input (e.g., "#3")' }, + file_path: { type: 'string', description: 'Path to the file to upload' }, + }, + required: ['instance_id', 'element_id', 'file_path'], + }, + }, + }, + // ── Content tools ─────────────────────────────────────────────── + { + type: 'function', + function: { + name: 'browser_pdf_extract', + description: 'Extract text content from a PDF URL. Returns the text, page count, and any tables or form fields.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + url: { type: 'string', description: 'URL of the PDF file' }, + }, + required: ['instance_id', 'url'], + }, + }, + }, + { + type: 'function', + function: { + name: 'browser_feed_parse', + description: 'Parse an RSS or Atom feed URL. Returns feed title, description, and items. Useful for monitoring news sources and blogs.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + url: { type: 'string', description: 'URL of the RSS/Atom feed' }, + }, + required: ['instance_id', 'url'], + }, + }, + }, + // ── Network control tools ─────────────────────────────────────── + { + type: 'function', + function: { + name: 'browser_network_block', + description: 'Block network requests by resource type. Speeds up browsing by skipping images, fonts, stylesheets, or media. Pass empty array to clear all blocks.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + resource_types: { + type: 'array', + items: { type: 'string' }, + description: 'Resource types to block: "image", "stylesheet", "font", "media", "websocket", "manifest". Pass empty array to clear all blocks.', + }, + }, + required: ['instance_id', 'resource_types'], + }, + }, + }, + { + type: 'function', + function: { + name: 'browser_network_log', + description: 'Get a log of all network requests made by the page. Returns URLs, methods, status codes, MIME types, sizes, and durations.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + filter: { type: 'string', description: 'Optional URL pattern to filter requests (substring match)' }, + }, + required: ['instance_id'], + }, + }, + }, + // ── Iframe tools ──────────────────────────────────────────────── + { + type: 'function', + function: { + name: 'browser_iframe_enter', + description: 'Enter an iframe to interact with its content. After entering, subsequent commands operate within the iframe.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + element_id: { type: 'string', description: 'Element ID of the iframe to enter (e.g., "#5")' }, + }, + required: ['instance_id', 'element_id'], + }, + }, + }, + { + type: 'function', + function: { + name: 'browser_iframe_exit', + description: 'Exit the current iframe and return to the parent page context.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + }, + required: ['instance_id'], + }, + }, + }, + // ── Page diff tool ────────────────────────────────────────────── + { + type: 'function', + function: { + name: 'browser_diff', + description: 'Compare the current page state against the last saved snapshot. Returns changes (added, removed, modified elements). Use to detect what changed after a click, scroll, or wait.', + parameters: { + type: 'object', + properties: { + instance_id: { type: 'string', description: 'The browser instance ID' }, + }, + required: ['instance_id'], + }, + }, + }, ]; -export type BrowserToolName = +export type BrowserToolName = | 'browser_new' | 'browser_navigate' | 'browser_click' @@ -525,4 +928,25 @@ export type BrowserToolName = | 'browser_wait' | 'browser_get_state' | 'browser_list' - | 'browser_close'; + | 'browser_close' + | 'browser_extract_text' + | 'browser_extract_links' + | 'browser_find' + | 'browser_extract_table' + | 'browser_extract_metadata' + | 'browser_screenshot' + | 'browser_select' + | 'browser_press_key' + | 'browser_hover' + | 'browser_tab_new' + | 'browser_tab_switch' + | 'browser_tab_close' + | 'browser_download' + | 'browser_upload' + | 'browser_pdf_extract' + | 'browser_feed_parse' + | 'browser_network_block' + | 'browser_network_log' + | 'browser_iframe_enter' + | 'browser_iframe_exit' + | 'browser_diff'; diff --git a/ai-agent/pardus-browser/src/tools/executor.ts b/ai-agent/pardus-browser/src/tools/executor.ts index 1e9dbb8..15be9d4 100644 --- a/ai-agent/pardus-browser/src/tools/executor.ts +++ b/ai-agent/pardus-browser/src/tools/executor.ts @@ -39,6 +39,19 @@ interface ToolCallArgs { min_count?: number; timeout_ms?: number; interval_ms?: number; + // Extraction args + filter?: string; + query?: string; + case_sensitive?: boolean; + // Interaction args + target_id?: string; + // Download/Upload args + filename?: string; + file_path?: string; + // PDF args + // Feed args + // Network args + resource_types?: string[]; } /** @@ -323,6 +336,48 @@ export class ToolExecutor { return this.handleList(); case 'browser_get_state': return this.handleGetState(typedArgs); + case 'browser_extract_text': + return this.handleExtractText(typedArgs); + case 'browser_extract_links': + return this.handleExtractLinks(typedArgs); + case 'browser_find': + return this.handleFind(typedArgs); + case 'browser_extract_table': + return this.handleExtractTable(typedArgs); + case 'browser_extract_metadata': + return this.handleExtractMetadata(typedArgs); + case 'browser_screenshot': + return this.handleScreenshot(typedArgs); + case 'browser_select': + return this.handleSelect(typedArgs); + case 'browser_press_key': + return this.handlePressKey(typedArgs); + case 'browser_hover': + return this.handleHover(typedArgs); + case 'browser_tab_new': + return this.handleTabNew(typedArgs); + case 'browser_tab_switch': + return this.handleTabSwitch(typedArgs); + case 'browser_tab_close': + return this.handleTabClose(typedArgs); + case 'browser_download': + return this.handleDownload(typedArgs); + case 'browser_upload': + return this.handleUpload(typedArgs); + case 'browser_pdf_extract': + return this.handlePdfExtract(typedArgs); + case 'browser_feed_parse': + return this.handleFeedParse(typedArgs); + case 'browser_network_block': + return this.handleNetworkBlock(typedArgs); + case 'browser_network_log': + return this.handleNetworkLog(typedArgs); + case 'browser_iframe_enter': + return this.handleIframeEnter(typedArgs); + case 'browser_iframe_exit': + return this.handleIframeExit(typedArgs); + case 'browser_diff': + return this.handleDiff(typedArgs); default: return { success: false, @@ -1165,6 +1220,612 @@ export class ToolExecutor { } } + // ── Extraction handlers ──────────────────────────────────────── + + private async handleExtractText(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.extractText(args.selector); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Text extraction failed' }; + } + + const content = `## Extracted Text\n\n` + + `- **Word Count**: ${result.word_count}\n` + + `- **Scope**: ${args.selector || 'full page'}\n\n` + + `---\n\n${result.text}`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleExtractLinks(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.extractLinks(args.filter, args.domain); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Link extraction failed' }; + } + + const filterNote = args.filter ? ` (filtered: "${args.filter}")` : ''; + const domainNote = args.domain ? ` (domain: ${args.domain})` : ''; + const linkLines = result.links.map((l, i) => { + const id = l.element_id ? ` [#${l.element_id}]` : ''; + return `${i + 1}. [${l.text}](${l.href})${id}`; + }).join('\n'); + + const content = `## Links (${result.count})${filterNote}${domainNote}\n\n${linkLines || 'No links found.'}`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleFind(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.query) { + return { success: false, content: '', error: 'Missing query' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.find(args.query, args.case_sensitive); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Find failed' }; + } + + const matchLines = result.matches.map((m, i) => { + const id = m.element_id ? ` [element #${m.element_id}]` : ''; + return `${i + 1}. "${m.text}"${id}\n ...${m.context}...`; + }).join('\n\n'); + + const content = `## Search Results\n\n` + + `- **Query**: "${args.query}"\n` + + `- **Matches**: ${result.count}\n\n` + + `---\n\n${matchLines || 'No matches found.'}`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleExtractTable(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.extractTable(args.selector); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Table extraction failed' }; + } + + const headerLine = `| ${result.headers.join(' | ')} |`; + const separatorLine = `| ${result.headers.map(() => '---').join(' | ')} |`; + const dataLines = result.rows.map(row => `| ${row.join(' | ')} |`).join('\n'); + + const content = `## Table (${result.row_count} rows)\n\n` + + `${headerLine}\n${separatorLine}\n${dataLines}`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleExtractMetadata(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.extractMetadata(); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Metadata extraction failed' }; + } + + let content = `## Page Metadata\n\n` + + `- **Title**: ${result.title}\n`; + + if (result.description) { + content += `- **Description**: ${result.description}\n`; + } + + if (Object.keys(result.open_graph).length > 0) { + content += `\n### Open Graph\n\n`; + for (const [key, val] of Object.entries(result.open_graph)) { + content += `- **og:${key}**: ${val}\n`; + } + } + + if (result.json_ld.length > 0) { + content += `\n### JSON-LD\n\n\`\`\`json\n${JSON.stringify(result.json_ld, null, 2)}\n\`\`\`\n`; + } + + if (Object.keys(result.meta).length > 0) { + content += `\n### Meta Tags\n\n`; + for (const [key, val] of Object.entries(result.meta)) { + content += `- **${key}**: ${val}\n`; + } + } + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleScreenshot(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.screenshot(); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Screenshot failed' }; + } + + const content = `## Screenshot\n\n` + + `- **MIME Type**: ${result.mime_type}\n` + + `- **Data Length**: ${result.data.length} bytes (base64)\n\n` + + `![Screenshot](data:${result.mime_type};base64,${result.data.substring(0, 100)}...)`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Interaction handlers ─────────────────────────────────────── + + private async handleSelect(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.element_id) { + return { success: false, content: '', error: 'Missing element_id' }; + } + if (!args.value) { + return { success: false, content: '', error: 'Missing value' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.selectOption(args.element_id, args.value); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Select failed' }; + } + + return { + success: true, + content: `Selected "${result.selected_value}" in dropdown ${args.element_id}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handlePressKey(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.key) { + return { success: false, content: '', error: 'Missing key' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.pressKey(args.key); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Press key failed' }; + } + + return { + success: true, + content: `Pressed key: ${args.key}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleHover(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.element_id) { + return { success: false, content: '', error: 'Missing element_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.hover(args.element_id); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Hover failed' }; + } + + return { + success: true, + content: `Hovered over element ${args.element_id}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Tab management handlers ──────────────────────────────────── + + private async handleTabNew(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.url) { + return { success: false, content: '', error: 'Missing url' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.newTab(args.url); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Failed to create tab' }; + } + + return { + success: true, + content: `## New Tab\n\n- **Target ID**: ${result.target_id}\n- **URL**: ${args.url}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleTabSwitch(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.target_id) { + return { success: false, content: '', error: 'Missing target_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.switchTab(args.target_id); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Failed to switch tab' }; + } + + return { + success: true, + content: `Switched to tab ${args.target_id}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleTabClose(args: ToolCallArgs): Promise { + if (!args.instance_id) { + return { success: false, content: '', error: 'Missing instance_id' }; + } + if (!args.target_id) { + return { success: false, content: '', error: 'Missing target_id' }; + } + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) { + return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + } + + try { + const result = await instance.closeTab(args.target_id); + + if (!result.success) { + return { success: false, content: '', error: result.error || 'Failed to close tab' }; + } + + return { + success: true, + content: `Closed tab ${args.target_id}`, + }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Download/Upload handlers ──────────────────────────────────── + + private async handleDownload(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.url) return { success: false, content: '', error: 'Missing url' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.download(args.url, args.filename); + if (!result.success) return { success: false, content: '', error: result.error || 'Download failed' }; + + const content = `## Download Complete\n\n` + + `- **URL**: ${args.url}\n` + + `- **Path**: ${result.path}\n` + + `- **Size**: ${result.size_bytes} bytes (${(result.size_bytes / 1024).toFixed(1)} KB)\n` + + (result.mime_type ? `- **MIME Type**: ${result.mime_type}\n` : ''); + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleUpload(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.element_id) return { success: false, content: '', error: 'Missing element_id' }; + if (!args.file_path) return { success: false, content: '', error: 'Missing file_path' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.upload(args.element_id, args.file_path); + if (!result.success) return { success: false, content: '', error: result.error || 'Upload failed' }; + + return { success: true, content: `Uploaded "${args.file_path}" to element ${args.element_id}` }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Content handlers ──────────────────────────────────────────── + + private async handlePdfExtract(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.url) return { success: false, content: '', error: 'Missing url' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.pdfExtract(args.url); + if (!result.success) return { success: false, content: '', error: result.error || 'PDF extraction failed' }; + + let content = `## PDF Extract\n\n` + + `- **URL**: ${args.url}\n` + + `- **Pages**: ${result.page_count}\n`; + + if (result.forms && result.forms.length > 0) { + content += `- **Form Fields**: ${result.forms.length}\n`; + } + + content += `\n---\n\n${result.text.substring(0, 8000)}`; + if (result.text.length > 8000) content += '\n\n... [truncated]'; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleFeedParse(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.url) return { success: false, content: '', error: 'Missing url' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.feedParse(args.url); + if (!result.success) return { success: false, content: '', error: result.error || 'Feed parse failed' }; + + let content = `## ${result.feed_type.toUpperCase()} Feed\n\n` + + `- **Title**: ${result.title}\n` + + (result.description ? `- **Description**: ${result.description}\n` : '') + + `- **Items**: ${result.item_count}\n\n`; + + for (const item of result.items.slice(0, 20)) { + content += `### ${item.title}\n`; + content += `- **Link**: ${item.link}\n`; + if (item.pub_date) content += `- **Date**: ${item.pub_date}\n`; + if (item.author) content += `- **Author**: ${item.author}\n`; + if (item.description) content += `- **Summary**: ${item.description.substring(0, 200)}${item.description.length > 200 ? '...' : ''}\n`; + content += '\n'; + } + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Network control handlers ──────────────────────────────────── + + private async handleNetworkBlock(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.resource_types) return { success: false, content: '', error: 'Missing resource_types' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.networkBlock(args.resource_types); + if (!result.success) return { success: false, content: '', error: result.error || 'Network block failed' }; + + const content = args.resource_types.length === 0 + ? '## Network Block Cleared\n\nAll resource blocks removed.' + : `## Network Block Set\n\nBlocked resource types: ${result.blocked_types.join(', ')}`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleNetworkLog(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.networkLog(args.filter); + if (!result.success) return { success: false, content: '', error: result.error || 'Network log failed' }; + + let content = `## Network Log (${result.count} requests)\n\n`; + + for (const req of result.requests.slice(0, 30)) { + content += `- **${req.method}** ${req.status} ${req.url.substring(0, 100)}${req.url.length > 100 ? '...' : ''} (${req.size_bytes} bytes, ${req.duration_ms}ms)\n`; + } + + if (result.count > 30) content += `\n... and ${result.count - 30} more requests`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Iframe handlers ───────────────────────────────────────────── + + private async handleIframeEnter(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + if (!args.element_id) return { success: false, content: '', error: 'Missing element_id' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.iframeEnter(args.element_id); + if (!result.success) return { success: false, content: '', error: result.error || 'Failed to enter iframe' }; + + return { success: true, content: `Entered iframe ${args.element_id}. Subsequent commands now operate within the iframe.` }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + private async handleIframeExit(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.iframeExit(); + if (!result.success) return { success: false, content: '', error: result.error || 'Failed to exit iframe' }; + + return { success: true, content: 'Returned to parent page context.' }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + + // ── Page diff handler ─────────────────────────────────────────── + + private async handleDiff(args: ToolCallArgs): Promise { + if (!args.instance_id) return { success: false, content: '', error: 'Missing instance_id' }; + + const instance = this.browserManager.getInstance(args.instance_id); + if (!instance) return { success: false, content: '', error: `Browser instance "${args.instance_id}" not found` }; + + try { + const result = await instance.diff(); + if (!result.success) return { success: false, content: '', error: result.error || 'Diff failed' }; + + let content = `## Page Diff (${result.change_count} changes)\n\n`; + + if (result.summary) content += `${result.summary}\n\n`; + + for (const change of result.changes.slice(0, 30)) { + const icon = change.type === 'added' ? '+' : change.type === 'removed' ? '-' : '~'; + content += `${icon} **[${change.type}]** ${change.selector}\n`; + if (change.type === 'added' && change.text) content += ` "${change.text.substring(0, 100)}"\n`; + if (change.type === 'removed' && change.text) content += ` "${change.text.substring(0, 100)}"\n`; + if (change.type === 'modified' && change.old_text && change.new_text) { + content += ` was: "${change.old_text.substring(0, 80)}"\n`; + content += ` now: "${change.new_text.substring(0, 80)}"\n`; + } + } + + if (result.change_count > 30) content += `\n... and ${result.change_count - 30} more changes`; + + return { success: true, content }; + } catch (error) { + return { success: false, content: '', error: error instanceof Error ? error.message : String(error) }; + } + } + private sleep(ms: number): Promise { return new Promise(resolve => setTimeout(resolve, ms)); } diff --git a/crates/pardus-cdp/Cargo.toml b/crates/pardus-cdp/Cargo.toml index 3f4a899..8fb4859 100644 --- a/crates/pardus-cdp/Cargo.toml +++ b/crates/pardus-cdp/Cargo.toml @@ -21,6 +21,7 @@ anyhow = { workspace = true } url = "2" parking_lot = { workspace = true } base64 = { workspace = true } +chrono = { workspace = true } [features] default = [] diff --git a/crates/pardus-cdp/src/domain/mod.rs b/crates/pardus-cdp/src/domain/mod.rs index d3fd03a..86e905b 100644 --- a/crates/pardus-cdp/src/domain/mod.rs +++ b/crates/pardus-cdp/src/domain/mod.rs @@ -103,7 +103,10 @@ impl DomainContext { /// !Send types (scraper::Html in Page) that cannot be held across await /// points in CDP handlers which must be Send. pub async fn navigate(&self, target_id: &str, url: &str) -> anyhow::Result<()> { - let page = pardus_core::Page::from_url(&self.app, url).await?; + let page = match pardus_core::Page::from_url_with_js(&self.app, url, 3000).await { + Ok(p) => p, + Err(_) => pardus_core::Page::from_url(&self.app, url).await?, + }; let frame_tree_json = page.frame_tree.as_ref() .and_then(|ft| serde_json::to_string(ft).ok()); let final_url = page.url.clone(); @@ -114,7 +117,7 @@ impl DomainContext { url: final_url, html: Some(html_str), title, - js_enabled: false, + js_enabled: true, frame_tree_json, }); Ok(()) diff --git a/crates/pardus-cdp/src/domain/pardus_ext.rs b/crates/pardus-cdp/src/domain/pardus_ext.rs index f006eab..c880d92 100644 --- a/crates/pardus-cdp/src/domain/pardus_ext.rs +++ b/crates/pardus-cdp/src/domain/pardus_ext.rs @@ -78,7 +78,18 @@ impl CdpDomainHandler for PardusDomain { let value = params["value"].as_str().unwrap_or("").to_string(); let fields_param = params.get("fields").cloned(); - let result = handle_interact(&action, &selector, &value, target_id, &fields_param, ctx).await; + let result = if !action.is_empty() { + let session_id = session.session_id.clone(); + emit_action_started(ctx, &action, &selector, &value, &session_id); + + let res = handle_interact(&action, &selector, &value, target_id, &fields_param, ctx).await; + + emit_action_completed(ctx, &action, &selector, &res, &session_id); + res + } else { + handle_interact(&action, &selector, &value, target_id, &fields_param, ctx).await + }; + HandleResult::Success(result) } "getNavigationGraph" => { @@ -378,3 +389,40 @@ fn collect_interactive_nodes(node: &pardus_core::SemanticNode, out: &mut Vec { let session = session.lock().await; let domain = event.method.split('.').next().unwrap_or(""); - if session.is_domain_enabled(domain) || domain == "Target" { + if session.is_domain_enabled(domain) || domain == "Target" || domain == "Pardus" { let json = serde_json::to_string(&event).unwrap_or_default(); drop(session); let msg = tungstenite::Message::Text(json.into()); diff --git a/crates/pardus-cli/src/commands/interact.rs b/crates/pardus-cli/src/commands/interact.rs index e073255..5fa02a4 100644 --- a/crates/pardus-cli/src/commands/interact.rs +++ b/crates/pardus-cli/src/commands/interact.rs @@ -114,6 +114,7 @@ fn output_result(result: &InteractionResult, format: &OutputFormatArg) { &tree, None, None, + new_page.redirect_chain.as_ref(), ) .unwrap_or_default(); println!("{}", json); @@ -166,6 +167,7 @@ fn output_result(result: &InteractionResult, format: &OutputFormatArg) { &tree, None, None, + new_page.redirect_chain.as_ref(), ) .unwrap_or_default(); println!("{}", json); diff --git a/crates/pardus-cli/src/commands/map.rs b/crates/pardus-cli/src/commands/map.rs index 678c233..fbb911b 100644 --- a/crates/pardus-cli/src/commands/map.rs +++ b/crates/pardus-cli/src/commands/map.rs @@ -31,10 +31,11 @@ pub async fn run_with_config( max_depth: depth, max_pages, delay_ms: delay, - verify_transitions: !skip_verify, discover_pagination: pagination, discover_hash_nav: hash_nav, discover_forms: false, + store_full_trees: true, + concurrency: 4, proxy: proxy_config, }; diff --git a/crates/pardus-cli/src/commands/navigate.rs b/crates/pardus-cli/src/commands/navigate.rs index 10acb1f..9accb68 100644 --- a/crates/pardus-cli/src/commands/navigate.rs +++ b/crates/pardus-cli/src/commands/navigate.rs @@ -46,6 +46,23 @@ pub async fn run_with_config( let page = browser.current_page().ok_or_else(|| anyhow::anyhow!("no page loaded"))?; + // Show redirect chain info + if let Some(ref chain) = page.redirect_chain { + if !chain.hops.is_empty() { + let original = chain.original_url().unwrap_or(&page.url); + println!( + " redirected {} -> {} ({} hop{})", + original, + page.url, + chain.hops.len(), + if chain.hops.len() == 1 { "" } else { "s" } + ); + for hop in &chain.hops { + println!(" {} {} -> {}", hop.status, hop.from, hop.to); + } + } + } + if network_log { page.discover_subresources(&net_log); pardus_core::Page::fetch_subresources(&http_client, &net_log).await; @@ -130,6 +147,7 @@ pub async fn run_with_config( &tree, nav_graph.as_ref(), network.as_ref(), + page.redirect_chain.as_ref(), )?; println!("{}", json); return Ok(()); diff --git a/crates/pardus-cli/src/commands/repl.rs b/crates/pardus-cli/src/commands/repl.rs index 0df93a9..a6efa1d 100644 --- a/crates/pardus-cli/src/commands/repl.rs +++ b/crates/pardus-cli/src/commands/repl.rs @@ -372,6 +372,7 @@ fn print_tree(browser: &Browser, format: &OutputFormatArg) { &tree, None, None, + page.redirect_chain.as_ref(), ) .unwrap_or_default(); println!("{}", json); @@ -415,6 +416,7 @@ fn print_interaction_result( &tree, None, None, + new_page.redirect_chain.as_ref(), ) .unwrap_or_default(); println!("{}", json); @@ -467,6 +469,7 @@ fn print_interaction_result( &tree, None, None, + new_page.redirect_chain.as_ref(), ) .unwrap_or_default(); println!("{}", json); diff --git a/crates/pardus-core/src/app.rs b/crates/pardus-core/src/app.rs index 270ab3d..73b89f3 100644 --- a/crates/pardus-core/src/app.rs +++ b/crates/pardus-core/src/app.rs @@ -60,9 +60,10 @@ fn chrome_default_headers() -> rquest::header::HeaderMap { pub fn build_http_client(config: &BrowserConfig) -> anyhow::Result { let mut client_builder = rquest::Client::builder() .emulation(Emulation::Chrome131) - .user_agent(&config.user_agent) .timeout(std::time::Duration::from_millis(config.timeout_ms as u64)) - .default_headers(chrome_default_headers()); + .default_headers(chrome_default_headers()) + .user_agent(&config.user_agent) + .cert_verification(false); // BoringSSL doesn't load system certs; skip verify for headless use // Sandbox: disable cookie store for ephemeral sessions if !config.sandbox.ephemeral_session { @@ -83,9 +84,9 @@ pub fn build_http_client(config: &BrowserConfig) -> anyhow::Result { pub navigation_graph: Option<&'a NavigationGraph>, #[serde(skip_serializing_if = "Option::is_none")] pub network_log: Option<&'a pardus_debug::formatter::NetworkLogJson>, + #[serde(skip_serializing_if = "Option::is_none")] + pub redirect_chain: Option<&'a RedirectChain>, } /// Format the full result as JSON. @@ -21,6 +24,7 @@ pub fn format_json( tree: &SemanticTree, nav_graph: Option<&NavigationGraph>, network_log: Option<&pardus_debug::formatter::NetworkLogJson>, + redirect_chain: Option<&RedirectChain>, ) -> anyhow::Result { let result = JsonResult { url: url.to_string(), @@ -29,6 +33,7 @@ pub fn format_json( stats: &tree.stats, navigation_graph: nav_graph, network_log, + redirect_chain, }; Ok(serde_json::to_string_pretty(&result)?) } diff --git a/crates/pardus-core/src/page.rs b/crates/pardus-core/src/page.rs index feb25ad..2cadb7d 100644 --- a/crates/pardus-core/src/page.rs +++ b/crates/pardus-core/src/page.rs @@ -14,6 +14,40 @@ use crate::interact::element::{ElementHandle, element_to_handle}; use pardus_debug::{NetworkRecord, ResourceType, Initiator}; +// --------------------------------------------------------------------------- +// Redirect chain types +// --------------------------------------------------------------------------- + +/// One hop in an HTTP redirect chain. +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct RedirectHop { + /// The URL that issued the redirect. + pub from: String, + /// The target URL from the Location header. + pub to: String, + /// The HTTP status code (301, 302, 303, 307, 308). + pub status: u16, +} + +/// The full redirect chain captured during an HTTP request. +/// +/// Ordered from first redirect to last. Empty when no redirects occurred. +#[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] +pub struct RedirectChain { + pub hops: Vec, +} + +impl RedirectChain { + pub fn is_empty(&self) -> bool { + self.hops.is_empty() + } + + /// The original URL before any redirects. + pub fn original_url(&self) -> Option<&str> { + self.hops.first().map(|h| h.from.as_str()) + } +} + /// Serializable snapshot of a page's state. /// /// Used to transfer page data over the wire (e.g., via CDP WebSocket) @@ -25,6 +59,8 @@ pub struct PageSnapshot { pub content_type: Option, pub title: Option, pub html: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub redirect_chain: Option, } pub struct Page { @@ -41,6 +77,8 @@ pub struct Page { /// Pre-built semantic tree for non-HTML content (e.g., PDFs). /// When `Some`, `semantic_tree()` returns this instead of parsing HTML. pub cached_tree: Option, + /// HTTP redirect chain (empty / None when no redirects occurred). + pub redirect_chain: Option, } impl Page { @@ -138,7 +176,7 @@ impl Page { let start = Instant::now(); let retry_config = app.config.read().retry.clone(); - let response = Self::fetch_with_retry(app, &effective_url, &req_ctx.headers, &retry_config).await?; + let (response, redirect_hops) = Self::fetch_with_retry(app, &effective_url, &req_ctx.headers, &retry_config).await?; let http_version = format_http_version(response.version()); let status = response.status().as_u16(); @@ -300,6 +338,11 @@ impl Page { csp: csp_policy, frame_tree, cached_tree: None, + redirect_chain: if redirect_hops.is_empty() { + None + } else { + Some(RedirectChain { hops: redirect_hops }) + }, }) } @@ -325,6 +368,7 @@ impl Page { csp: None, frame_tree: None, cached_tree: None, + redirect_chain: None, } } @@ -344,19 +388,31 @@ impl Page { csp: None, frame_tree: None, cached_tree: None, + redirect_chain: None, }) } /// Execute HTTP request with configurable retry and exponential backoff. + /// + /// Returns the response and any HTTP redirect hops that were captured. async fn fetch_with_retry( app: &Arc, url: &str, extra_headers: &std::collections::HashMap, retry_config: &crate::config::RetryConfig, - ) -> anyhow::Result { + ) -> anyhow::Result<(rquest::Response, Vec)> { + let max_redirects = app.config.read().max_redirects; + let redirect_hops: Arc>> = + Arc::new(std::sync::Mutex::new(Vec::new())); let mut attempt = 0u32; loop { + // Clear stale hops from previous retry attempts + redirect_hops.lock().unwrap().clear(); + + let hops_clone = redirect_hops.clone(); + let max = max_redirects; + let mut request_builder = app.http_client.get(url); // Apply interceptor-modified headers @@ -364,6 +420,24 @@ impl Page { request_builder = request_builder.header(name.as_str(), value.as_str()); } + // Set custom redirect policy to capture each hop + request_builder = request_builder.redirect( + rquest::redirect::Policy::custom(move |attempt| { + if attempt.previous().len() >= max { + return attempt.error("too many redirects"); + } + let from = attempt.previous().last() + .map(|u| u.to_string()) + .unwrap_or_default(); + let to = attempt.url().to_string(); + let status = attempt.status().as_u16(); + if let Ok(mut hops) = hops_clone.lock() { + hops.push(RedirectHop { from, to, status }); + } + attempt.follow() + }) + ); + // Build the request so we can retry it let request = request_builder .build() @@ -384,7 +458,11 @@ impl Page { tokio::time::sleep(std::time::Duration::from_millis(delay)).await; continue; } - return Ok(response); + // Extract collected redirect hops + let hops = Arc::try_unwrap(redirect_hops) + .map(|m| m.into_inner().unwrap_or_default()) + .unwrap_or_default(); + return Ok((response, hops)); } Err(e) if (e.is_timeout() || e.is_connect()) && attempt < retry_config.max_retries => { attempt += 1; @@ -420,6 +498,7 @@ impl Page { csp: None, frame_tree: None, cached_tree: Some(tree), + redirect_chain: None, }) } @@ -442,6 +521,7 @@ impl Page { csp: None, frame_tree: None, cached_tree: Some(tree), + redirect_chain: None, }) } @@ -522,6 +602,7 @@ impl Page { csp: None, frame_tree: None, cached_tree: None, + redirect_chain: None, } } @@ -538,6 +619,7 @@ impl Page { csp: None, frame_tree: Some(frame_tree), cached_tree: None, + redirect_chain: None, } } @@ -560,6 +642,7 @@ impl Page { csp: None, frame_tree: Some(frame_tree), cached_tree: None, + redirect_chain: None, } } @@ -704,6 +787,7 @@ impl Page { content_type: self.content_type.clone(), title: self.title(), html: self.html.html(), + redirect_chain: self.redirect_chain.clone(), } } @@ -721,6 +805,7 @@ impl Page { csp: self.csp.clone(), frame_tree: None, cached_tree: self.cached_tree.clone(), + redirect_chain: self.redirect_chain.clone(), } } diff --git a/crates/pardus-core/src/page_analysis.rs b/crates/pardus-core/src/page_analysis.rs new file mode 100644 index 0000000..760ff01 --- /dev/null +++ b/crates/pardus-core/src/page_analysis.rs @@ -0,0 +1,20 @@ +use scraper::Html; + +use crate::navigation::graph::NavigationGraph; +use crate::semantic::tree::SemanticTree; + +pub struct PageAnalysis { + pub semantic_tree: SemanticTree, + pub navigation_graph: NavigationGraph, +} + +impl PageAnalysis { + pub fn build(html: &Html, page_url: &str) -> Self { + let semantic_tree = SemanticTree::build(html, page_url); + let navigation_graph = NavigationGraph::build(html, page_url); + Self { + semantic_tree, + navigation_graph, + } + } +} diff --git a/crates/pardus-core/src/semantic/tree.rs.bak b/crates/pardus-core/src/semantic/tree.rs.bak deleted file mode 100644 index 7e13086..0000000 --- a/crates/pardus-core/src/semantic/tree.rs.bak +++ /dev/null @@ -1,434 +0,0 @@ -use scraper::{ElementRef, Html, Selector}; -use std::fmt; -use url::Url; - -use once_cell::sync::Lazy; - -// Pre-compiled selectors for performance and safety -static BODY_SELECTOR: Lazy = Lazy::new(|| { - Selector::parse("body").expect("'body' is always a valid CSS selector") -}); - -// --------------------------------------------------------------------------- -// Semantic Tree -// --------------------------------------------------------------------------- - -/// The semantic tree extracted from an HTML page. -#[derive(Debug)] -pub struct SemanticTree { - pub root: SemanticNode, - pub stats: TreeStats, -} - -/// A node in the semantic tree. -#[derive(Debug, Clone)] -pub struct SemanticNode { - pub role: SemanticRole, - pub name: Option, - pub tag: String, - pub is_interactive: bool, - pub is_disabled: bool, - pub href: Option, - pub action: Option, - pub children: Vec, -} - -/// Statistics about the semantic tree. -#[derive(Debug, Default)] -pub struct TreeStats { - pub landmarks: usize, - pub links: usize, - pub headings: usize, - pub actions: usize, - pub forms: usize, - pub images: usize, - pub total_nodes: usize, -} - -// --------------------------------------------------------------------------- -// Semantic Role -// --------------------------------------------------------------------------- - -#[derive(Debug, Clone, PartialEq)] -pub enum SemanticRole { - Document, - Banner, - Navigation, - Main, - ContentInfo, - Complementary, - Region, - Form, - Search, - Article, - Heading { level: u8 }, - Link, - Button, - TextBox, - Checkbox, - Radio, - Combobox, - List, - ListItem, - Table, - Row, - Cell, - ColumnHeader, - RowHeader, - Image, - Dialog, - Generic, - StaticText, - Other(String), -} - -impl fmt::Display for SemanticRole { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - Self::Heading { level } => write!(f, "heading (h{level})"), - Self::Other(s) => write!(f, "{s}"), - _ => write!(f, "{}", self.role_str()), - } - } -} - -impl SemanticRole { - pub fn role_str(&self) -> &str { - match self { - Self::Document => "document", - Self::Banner => "banner", - Self::Navigation => "navigation", - Self::Main => "main", - Self::ContentInfo => "contentinfo", - Self::Complementary => "complementary", - Self::Region => "region", - Self::Form => "form", - Self::Search => "search", - Self::Article => "article", - Self::Heading { .. } => "heading", - Self::Link => "link", - Self::Button => "button", - Self::TextBox => "textbox", - Self::Checkbox => "checkbox", - Self::Radio => "radio", - Self::Combobox => "combobox", - Self::List => "list", - Self::ListItem => "listitem", - Self::Table => "table", - Self::Row => "row", - Self::Cell => "cell", - Self::ColumnHeader => "columnheader", - Self::RowHeader => "rowheader", - Self::Image => "img", - Self::Dialog => "dialog", - Self::Generic => "generic", - Self::StaticText => "static", - Self::Other(s) => s.as_str(), - } - } - - pub fn is_landmark(&self) -> bool { - matches!( - self, - Self::Banner - | Self::Navigation - | Self::Main - | Self::ContentInfo - | Self::Complementary - | Self::Region - | Self::Form - | Self::Search - | Self::Article - ) - } - - pub fn is_heading(&self) -> bool { - matches!(self, Self::Heading { .. }) - } -} - -// --------------------------------------------------------------------------- -// Semantic Tree Builder -// --------------------------------------------------------------------------- - -impl SemanticTree { - /// Build a semantic tree from an HTML document. - pub fn from_html(html: &Html, base_url: &str) -> Self { - let mut stats = TreeStats::default(); - let mut builder = TreeBuilder { - base_url, - stats: &mut stats, - }; - let root = builder.build_from_html(html); - stats.total_nodes = count_nodes(&root); - Self { root, stats } - } -} - -struct TreeBuilder<'a> { - base_url: &'a str, - stats: &'static mut TreeStats, -} - -impl<'a> TreeBuilder<'a> { - fn build_from_html(&mut self, html: &Html) -> SemanticNode { - let body = html.select(&*BODY_SELECTOR).next(); - - let root = SemanticNode { - role: SemanticRole::Document, - name: None, - tag: "document".to_string(), - is_interactive: false, - is_disabled: false, - href: None, - action: None, - children: if let Some(body) = body { - self.build_children(&body) - } else { - Vec::new() - }, - }; - - root - } - - fn build_children(&mut self, el: &ElementRef) -> Vec { - el.children() - .filter_map(|child| self.build_node(child)) - .collect() - } - - fn build_node(&mut self, node: scraper::NodeRef) -> Option { - if let scraper::Node::Element(elem) = node.value() { - let tag = elem.name().local.as_ref().to_lowercase(); - let (role, action) = infer_role(&tag, elem); - let is_interactive = is_interactive_element(&tag, elem); - let is_disabled = elem.attr("disabled").is_some() - || elem.attr("aria-disabled") == Some("true"); - - let name = extract_name(elem); - let href = elem.attr("href").map(|s| { - if s.starts_with("http") || s.starts_with("//") { - s.to_string() - } else { - resolve_url(self.base_url, s) - } - }); - - if role.is_landmark() { - self.stats.landmarks += 1; - } - if matches!(role, SemanticRole::Link) { - self.stats.links += 1; - } - if role.is_heading() { - self.stats.headings += 1; - } - if is_interactive { - self.stats.actions += 1; - } - if matches!(role, SemanticRole::Form) { - self.stats.forms += 1; - } - if matches!(role, SemanticRole::Image) { - self.stats.images += 1; - } - - let children = if is_interactive { - // For interactive elements, don't recurse deeply - Vec::new() - } else { - node.children() - .filter_map(|child| self.build_node(child)) - .collect() - }; - - return Some(SemanticNode { - role, - name, - tag, - is_interactive, - is_disabled, - href, - action, - children, - }); - } - - if let scraper::Node::Text(text) = node.value() { - let content = text.trim(); - if !content.is_empty() { - return Some(SemanticNode { - role: SemanticRole::StaticText, - name: Some(content.to_string()), - tag: "#text".to_string(), - is_interactive: false, - is_disabled: false, - href: None, - action: None, - children: Vec::new(), - }); - } - } - - None - } -} - -fn infer_role(tag: &str, elem: &scraper::element_ref::Element) -> (SemanticRole, Option) { - // Check explicit ARIA role first - if let Some(role_attr) = elem.attr("role") { - let role = match role_attr { - "banner" => SemanticRole::Banner, - "navigation" => SemanticRole::Navigation, - "main" => SemanticRole::Main, - "contentinfo" => SemanticRole::ContentInfo, - "complementary" => SemanticRole::Complementary, - "region" => SemanticRole::Region, - "form" => SemanticRole::Form, - "search" => SemanticRole::Search, - "article" => SemanticRole::Article, - _ => SemanticRole::Other(role_attr.to_string()), - }; - return (role, infer_action(tag, elem)); - } - - // HTML5 semantic elements - let role = match tag { - "header" => SemanticRole::Banner, - "nav" => SemanticRole::Navigation, - "main" => SemanticRole::Main, - "footer" => SemanticRole::ContentInfo, - "aside" => SemanticRole::Complementary, - "section" => SemanticRole::Region, - "form" => SemanticRole::Form, - "article" => SemanticRole::Article, - "h1" => SemanticRole::Heading { level: 1 }, - "h2" => SemanticRole::Heading { level: 2 }, - "h3" => SemanticRole::Heading { level: 3 }, - "h4" => SemanticRole::Heading { level: 4 }, - "h5" => SemanticRole::Heading { level: 5 }, - "h6" => SemanticRole::Heading { level: 6 }, - "a" => SemanticRole::Link, - "button" => SemanticRole::Button, - "input" => { - let input_type = elem.attr("type").unwrap_or("text"); - match input_type { - "checkbox" => SemanticRole::Checkbox, - "radio" => SemanticRole::Radio, - "submit" | "button" | "reset" => SemanticRole::Button, - _ => SemanticRole::TextBox, - } - } - "select" => SemanticRole::Combobox, - "textarea" => SemanticRole::TextBox, - "img" | "image" => SemanticRole::Image, - "ul" | "ol" => SemanticRole::List, - "li" => SemanticRole::ListItem, - "table" => SemanticRole::Table, - "tr" => SemanticRole::Row, - "td" => SemanticRole::Cell, - "th" => { - let scope = elem.attr("scope").unwrap_or(""); - if scope == "row" || scope == "rowgroup" { - SemanticRole::RowHeader - } else { - SemanticRole::ColumnHeader - } - } - "dialog" => SemanticRole::Dialog, - _ => SemanticRole::Generic, - }; - - (role, infer_action(tag, elem)) -} - -fn infer_action(tag: &str, elem: &scraper::element_ref::Element) -> Option { - match tag { - "a" => Some("navigate".to_string()), - "button" => Some("click".to_string()), - "input" => { - let input_type = elem.attr("type").unwrap_or("text"); - match input_type { - "submit" => Some("click".to_string()), - "checkbox" | "radio" => Some("toggle".to_string()), - _ => Some("fill".to_string()), - } - } - "select" => Some("select".to_string()), - "textarea" => Some("fill".to_string()), - _ => None, - } -} - -fn is_interactive_element(tag: &str, elem: &scraper::element_ref::Element) -> bool { - matches!(tag, "a" | "button" | "select" | "textarea" | "input") - || elem.attr("onclick").is_some() - || elem.attr("role").is_some_and(|r| { - matches!(r, "button" | "link" | "checkbox" | "radio" | "textbox" | "combobox") - }) -} - -fn extract_name(elem: &scraper::element_ref::Element) -> Option { - // Try aria-label first - if let Some(label) = elem.attr("aria-label") { - if !label.is_empty() { - return Some(label.to_string()); - } - } - - // Try aria-labelledby (simplified: just return the ID, not the actual text) - if let Some(labelledby) = elem.attr("aria-labelledby") { - if !labelledby.is_empty() { - return Some(format!("labelled by: {}", labelledby)); - } - } - - // Try title attribute - if let Some(title) = elem.attr("title") { - if !title.is_empty() { - return Some(title.to_string()); - } - } - - // Try alt for images - if let Some(alt) = elem.attr("alt") { - if !alt.is_empty() { - return Some(alt.to_string()); - } - } - - // Try placeholder for inputs - if let Some(placeholder) = elem.attr("placeholder") { - if !placeholder.is_empty() { - return Some(placeholder.to_string()); - } - } - - // Try value for inputs - if let Some(value) = elem.attr("value") { - if !value.is_empty() { - return Some(value.to_string()); - } - } - - // Try name attribute - if let Some(name) = elem.attr("name") { - if !name.is_empty() { - return Some(name.to_string()); - } - } - - None -} - -fn resolve_url(base: &str, href: &str) -> String { - Url::parse(base) - .and_then(|b| b.join(href)) - .map(|u| u.to_string()) - .unwrap_or_else(|_| format!("{}{}", base.trim_end_matches('/'), href)) -} - -fn count_nodes(node: &SemanticNode) -> usize { - 1 + node.children.iter().map(count_nodes).sum::() -} diff --git a/crates/pardus-core/tests/browser_agent_test.rs b/crates/pardus-core/tests/browser_agent_test.rs deleted file mode 100644 index 88d4aa7..0000000 --- a/crates/pardus-core/tests/browser_agent_test.rs +++ /dev/null @@ -1,143 +0,0 @@ -//! Tests for browser agent (human-like browser headers) feature. -//! -//! Tests that BrowserAgentConfig correctly generates realistic browser headers -//! and that they are applied to HTTP requests. - -use pardus_core::{BrowserConfig, BrowserAgentConfig, RefererPolicy}; - -// --------------------------------------------------------------------------- -// BrowserAgentConfig Tests -// --------------------------------------------------------------------------- - -#[test] -fn test_browser_agent_config_default_disabled() { - let config = BrowserAgentConfig::default(); - assert!(!config.enabled); -} - -#[test] -fn test_chrome_macos_profile() { - let config = BrowserAgentConfig::chrome_macos(); - assert!(config.enabled); - assert!(config.user_agent.contains("Chrome")); - assert!(config.user_agent.contains("Macintosh")); - assert!(config.sec_fetch_headers); - assert!(!config.dnt); - assert!(config.keep_alive); -} - -#[test] -fn test_chrome_windows_profile() { - let config = BrowserAgentConfig::chrome_windows(); - assert!(config.enabled); - assert!(config.user_agent.contains("Chrome")); - assert!(config.user_agent.contains("Windows NT 10.0")); - assert!(config.sec_fetch_headers); -} - -#[test] -fn test_firefox_macos_profile() { - let config = BrowserAgentConfig::firefox_macos(); - assert!(config.enabled); - assert!(config.user_agent.contains("Firefox")); - assert!(config.user_agent.contains("Macintosh")); - assert!(!config.sec_fetch_headers); - assert!(config.dnt); -} - -#[test] -fn test_safari_macos_profile() { - let config = BrowserAgentConfig::safari_macos(); - assert!(config.enabled); - assert!(config.user_agent.contains("Safari")); - assert!(config.user_agent.contains("Version/17.1")); - assert!(config.sec_fetch_headers); -} - -#[test] -fn test_browser_agent_headers_generation() { - let config = BrowserAgentConfig::chrome_macos(); - let headers = config.to_headers(); - let header_map: std::collections::HashMap<\u0026str, String> = headers.into_iter().collect(); - assert!(header_map.contains_key("Accept")); - assert!(header_map.contains_key("Accept-Language")); - assert!(header_map.contains_key("Accept-Encoding")); - assert!(header_map.contains_key("Cache-Control")); - assert!(header_map.contains_key("Connection")); - assert!(header_map.contains_key("Sec-Fetch-Dest")); - assert!(header_map.contains_key("Sec-Fetch-Mode")); - assert!(header_map.contains_key("Sec-Fetch-Site")); - assert!(header_map.contains_key("Sec-Fetch-User")); - assert!(header_map.contains_key("Upgrade-Insecure-Requests")); -} - -#[test] -fn test_firefox_headers_no_sec_fetch() { - let config = BrowserAgentConfig::firefox_macos(); - let headers = config.to_headers(); - let header_map: std::collections::HashMap<\u0026str, String> = headers.into_iter().collect(); - assert!(!header_map.contains_key("Sec-Fetch-Dest")); - assert!(header_map.contains_key("DNT")); -} - -#[test] -fn test_browser_config_with_browser_agent() { - let agent_config = BrowserAgentConfig::chrome_macos(); - let browser_config = BrowserConfig::default() - .with_browser_agent(agent_config); - assert!(browser_config.browser_agent.enabled); - assert!(browser_config.browser_agent.user_agent.contains("Chrome")); -} - -#[test] -fn test_effective_user_agent_with_browser_agent() { - let agent_config = BrowserAgentConfig::chrome_macos(); - let browser_config = BrowserConfig::default() - .with_browser_agent(agent_config); - let ua = browser_config.effective_user_agent(); - assert!(ua.contains("Chrome")); - assert!(!ua.contains("PardusBrowser")); -} - -#[test] -fn test_effective_user_agent_without_browser_agent() { - let browser_config = BrowserConfig::default(); - let ua = browser_config.effective_user_agent(); - assert!(ua.contains("PardusBrowser")); -} - -#[test] -fn test_browser_agent_request_delay_range() { - let chrome = BrowserAgentConfig::chrome_macos(); - let firefox = BrowserAgentConfig::firefox_macos(); - let safari = BrowserAgentConfig::safari_macos(); - assert_eq!(chrome.request_delay_ms, (100, 500)); - assert_eq!(firefox.request_delay_ms, (150, 600)); - assert_eq!(safari.request_delay_ms, (200, 800)); -} - -#[test] -fn test_referer_policy_default() { - let config = BrowserAgentConfig::chrome_macos(); - assert!(matches!(config.referer_policy, RefererPolicy::Always)); -} - -#[test] -fn test_accept_header_content() { - let config = BrowserAgentConfig::chrome_macos(); - let headers = config.to_headers(); - let header_map: std::collections::HashMap<\u0026str, String> = headers.into_iter().collect(); - let accept = header_map.get("Accept").expect("Accept header should exist"); - assert!(accept.contains("text/html")); - assert!(accept.contains("application/xhtml+xml")); -} - -#[test] -fn test_accept_language_header() { - let chrome = BrowserAgentConfig::chrome_macos(); - let firefox = BrowserAgentConfig::firefox_macos(); - let chrome_headers: std::collections::HashMap<\u0026str, String> = chrome.to_headers().into_iter().collect(); - let firefox_headers: std::collections::HashMap<\u0026str, String> = firefox.to_headers().into_iter().collect(); - assert_eq!(chrome_headers.get("Accept-Language").unwrap(), "en-US,en;q=0.9"); - assert_eq!(firefox_headers.get("Accept-Language").unwrap(), "en-US,en;q=0.5"); -} diff --git a/crates/pardus-core/tests/config_test.rs b/crates/pardus-core/tests/config_test.rs new file mode 100644 index 0000000..9d28aca --- /dev/null +++ b/crates/pardus-core/tests/config_test.rs @@ -0,0 +1,140 @@ +//! Tests for BrowserConfig defaults and sub-configs. +//! +//! Verifies that all configuration structs produce sensible defaults +//! and that builder methods chain correctly. + +use pardus_core::BrowserConfig; + +// --------------------------------------------------------------------------- +// BrowserConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_user_agent_is_chrome() { + let config = BrowserConfig::default(); + let ua = config.effective_user_agent(); + assert!(ua.starts_with("Mozilla/5.0")); + assert!(ua.contains("Chrome/131")); + assert!(ua.contains("Safari/537.36")); +} + +#[test] +fn test_default_timeouts() { + let config = BrowserConfig::default(); + assert_eq!(config.timeout_ms, 10_000); + assert_eq!(config.wait_ms, 3_000); +} + +#[test] +fn test_default_viewport() { + let config = BrowserConfig::default(); + assert_eq!(config.viewport_width, 1280); + assert_eq!(config.viewport_height, 720); +} + +#[test] +fn test_default_iframe_settings() { + let config = BrowserConfig::default(); + assert!(config.parse_iframes); + assert_eq!(config.max_iframe_depth, 5); +} + +#[test] +fn test_default_limits() { + let config = BrowserConfig::default(); + assert_eq!(config.max_upload_size, 50 * 1024 * 1024); + assert_eq!(config.max_redirects, 10); +} + +#[test] +fn test_default_screenshot_options() { + let config = BrowserConfig::default(); + assert!(config.screenshot_endpoint.is_none()); + assert_eq!(config.screenshot_timeout_ms, 10_000); + assert!(config.screenshot_chrome_path.is_none()); +} + +// --------------------------------------------------------------------------- +// ConnectionPoolConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_connection_pool() { + let config = BrowserConfig::default(); + assert_eq!(config.connection_pool.max_idle_per_host, 32); + assert_eq!(config.connection_pool.idle_timeout_secs, 90); + assert_eq!(config.connection_pool.tcp_keepalive_secs, 60); + assert!(config.connection_pool.enable_http2); +} + +// --------------------------------------------------------------------------- +// PushConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_push_config() { + let config = BrowserConfig::default(); + assert!(config.push.enable_push); + assert_eq!(config.push.max_push_resources, 32); + assert_eq!(config.push.push_cache_ttl_secs, 30); +} + +// --------------------------------------------------------------------------- +// RetryConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_retry_config() { + let config = BrowserConfig::default(); + let retry = &config.retry; + assert_eq!(retry.max_retries, 0); // disabled by default + assert_eq!(retry.initial_backoff_ms, 100); + assert_eq!(retry.max_backoff_ms, 10_000); + assert!((retry.backoff_factor - 2.0).abs() < f64::EPSILON); + assert_eq!( + retry.retry_on_statuses, + vec![408, 429, 500, 502, 503, 504] + ); +} + +// --------------------------------------------------------------------------- +// CspConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_csp_config() { + let config = BrowserConfig::default(); + assert!(!config.csp.enforce_csp); + assert!(config.csp.log_report_only); + assert!(config.csp.override_policy.is_none()); +} + +// --------------------------------------------------------------------------- +// ProxyConfig defaults +// --------------------------------------------------------------------------- + +#[test] +fn test_default_proxy_config() { + let config = BrowserConfig::default(); + assert!(config.proxy.http_proxy.is_none()); + assert!(config.proxy.https_proxy.is_none()); + assert!(config.proxy.all_proxy.is_none()); + assert!(config.proxy.no_proxy.is_none()); +} + +// --------------------------------------------------------------------------- +// User-agent override +// --------------------------------------------------------------------------- + +#[test] +fn test_custom_user_agent() { + let mut config = BrowserConfig::default(); + config.user_agent = "CustomBot/1.0".to_string(); + assert_eq!(config.effective_user_agent(), "CustomBot/1.0"); +} + +#[test] +fn test_dedup_window_default_disabled() { + let config = BrowserConfig::default(); + assert_eq!(config.dedup_window_ms, 0); +} diff --git a/crates/pardus-core/tests/output_formatter_test.rs b/crates/pardus-core/tests/output_formatter_test.rs new file mode 100644 index 0000000..16778d9 --- /dev/null +++ b/crates/pardus-core/tests/output_formatter_test.rs @@ -0,0 +1,231 @@ +//! Tests for output formatters: format_tree, format_md, format_llm, format_json. + +use pardus_core::{RedirectChain, SemanticNode, SemanticRole, SemanticTree, TreeStats}; +use scraper::Html; + +fn tree_from(html: &str) -> SemanticTree { + let parsed = Html::parse_document(html); + SemanticTree::build(&parsed, "https://example.com") +} + +fn simple_tree() -> SemanticTree { + tree_from(r#" +

Title

+ +
+

Hello world

+
+ + +
+
+ "#) +} + +// --------------------------------------------------------------------------- +// format_tree — unicode tree output +// --------------------------------------------------------------------------- + +#[test] +fn test_format_tree_produces_output() { + let tree = simple_tree(); + let output = pardus_core::format_tree(&tree); + assert!(!output.is_empty()); +} + +#[test] +fn test_format_tree_has_tree_chars() { + let tree = simple_tree(); + let output = pardus_core::format_tree(&tree); + assert!( + output.contains("├") || output.contains("└"), + "tree output should contain tree branch characters" + ); +} + +#[test] +fn test_format_tree_shows_roles() { + let tree = tree_from(""); + let output = pardus_core::format_tree(&tree); + assert!( + output.contains("navigation") || output.contains("nav"), + "tree output should mention navigation role" + ); +} + +// --------------------------------------------------------------------------- +// format_md — markdown-style output +// --------------------------------------------------------------------------- + +#[test] +fn test_format_md_produces_output() { + let tree = simple_tree(); + let output = pardus_core::output::md_formatter::format_md(&tree); + assert!(!output.is_empty()); +} + +#[test] +fn test_format_md_starts_with_document() { + let tree = simple_tree(); + let output = pardus_core::output::md_formatter::format_md(&tree); + assert!( + output.starts_with("document"), + "MD output should start with 'document'" + ); +} + +#[test] +fn test_format_md_shows_links() { + let tree = tree_from(r#"Link"#); + let output = pardus_core::output::md_formatter::format_md(&tree); + assert!( + output.contains("link") || output.contains("/page"), + "MD output should contain link info" + ); +} + +// --------------------------------------------------------------------------- +// format_llm — LLM-optimized output +// --------------------------------------------------------------------------- + +#[test] +fn test_format_llm_produces_output() { + let tree = simple_tree(); + let output = pardus_core::format_llm(&tree); + assert!(!output.is_empty()); +} + +#[test] +fn test_format_llm_compact_format() { + let tree = tree_from(r#" + Link A + + "#); + let output = pardus_core::format_llm(&tree); + // LLM format uses single-char tags and compact notation + assert!(output.len() > 0); +} + +#[test] +fn test_format_llm_lists_actions() { + let tree = tree_from(r#" + + Go + "#); + let output = pardus_core::format_llm(&tree); + assert!(!output.is_empty()); +} + +// --------------------------------------------------------------------------- +// format_json — structured JSON output +// --------------------------------------------------------------------------- + +#[test] +fn test_format_json_produces_valid_json() { + let tree = simple_tree(); + let output = pardus_core::output::json_formatter::format_json( + "https://example.com", + Some("Test Page".to_string()), + &tree, + None, + None, + None as Option<&RedirectChain>, + ) + .expect("format_json should succeed"); + + let parsed: serde_json::Value = serde_json::from_str(&output).expect("output should be valid JSON"); + assert!(parsed.get("url").is_some() || parsed.get("semantic_tree").is_some()); +} + +#[test] +fn test_format_json_includes_url() { + let tree = simple_tree(); + let output = pardus_core::output::json_formatter::format_json( + "https://example.com/page", + None, + &tree, + None, + None, + None, + ) + .expect("format_json should succeed"); + + assert!(output.contains("https://example.com/page")); +} + +#[test] +fn test_format_json_includes_stats() { + let tree = simple_tree(); + let output = pardus_core::output::json_formatter::format_json( + "https://example.com", + None, + &tree, + None, + None, + None, + ) + .expect("format_json should succeed"); + + assert!(output.contains("stats") || output.contains("landmarks") || output.contains("links")); +} + +// --------------------------------------------------------------------------- +// Empty tree edge case +// --------------------------------------------------------------------------- + +#[test] +fn test_format_empty_tree() { + let tree = SemanticTree { + root: SemanticNode { + role: SemanticRole::Document, + name: None, + tag: "document".to_string(), + is_interactive: false, + is_disabled: false, + href: None, + action: None, + element_id: None, + selector: None, + input_type: None, + placeholder: None, + is_required: false, + is_readonly: false, + current_value: None, + is_checked: false, + options: Vec::new(), + pattern: None, + min_length: None, + max_length: None, + min_val: None, + max_val: None, + step_val: None, + autocomplete: None, + accept: None, + multiple: false, + children: Vec::new(), + }, + stats: TreeStats::default(), + }; + + let tree_out = pardus_core::format_tree(&tree); + assert!(!tree_out.is_empty()); + + let md_out = pardus_core::output::md_formatter::format_md(&tree); + assert!(!md_out.is_empty()); + + let llm_out = pardus_core::format_llm(&tree); + // LLM format might be empty for an empty tree, that's OK + assert!(llm_out.len() >= 0); + + let json_out = pardus_core::output::json_formatter::format_json( + "https://example.com", + None, + &tree, + None, + None, + None, + ) + .expect("format_json should work on empty tree"); + let parsed: serde_json::Value = serde_json::from_str(&json_out).unwrap(); + assert!(parsed.is_object()); +} diff --git a/crates/pardus-core/tests/redirect_chain_test.rs b/crates/pardus-core/tests/redirect_chain_test.rs new file mode 100644 index 0000000..57e5478 --- /dev/null +++ b/crates/pardus-core/tests/redirect_chain_test.rs @@ -0,0 +1,91 @@ +//! Tests for RedirectChain, RedirectHop, and PageSnapshot. + +use pardus_core::{RedirectChain, RedirectHop}; + +// --------------------------------------------------------------------------- +// RedirectHop +// --------------------------------------------------------------------------- + +#[test] +fn test_redirect_hop_fields() { + let hop = RedirectHop { + from: "https://a.com".to_string(), + to: "https://b.com".to_string(), + status: 301, + }; + assert_eq!(hop.from, "https://a.com"); + assert_eq!(hop.to, "https://b.com"); + assert_eq!(hop.status, 301); +} + +// --------------------------------------------------------------------------- +// RedirectChain +// --------------------------------------------------------------------------- + +#[test] +fn test_empty_chain() { + let chain = RedirectChain::default(); + assert!(chain.is_empty()); + assert!(chain.hops.is_empty()); + assert!(chain.original_url().is_none()); +} + +#[test] +fn test_single_hop() { + let chain = RedirectChain { + hops: vec![RedirectHop { + from: "https://a.com".to_string(), + to: "https://b.com".to_string(), + status: 301, + }], + }; + assert!(!chain.is_empty()); + assert_eq!(chain.original_url(), Some("https://a.com")); + assert_eq!(chain.hops.len(), 1); +} + +#[test] +fn test_multi_hop_chain() { + let chain = RedirectChain { + hops: vec![ + RedirectHop { + from: "https://a.com".to_string(), + to: "https://b.com".to_string(), + status: 301, + }, + RedirectHop { + from: "https://b.com".to_string(), + to: "https://c.com".to_string(), + status: 302, + }, + RedirectHop { + from: "https://c.com".to_string(), + to: "https://d.com".to_string(), + status: 301, + }, + ], + }; + assert!(!chain.is_empty()); + assert_eq!(chain.original_url(), Some("https://a.com")); + assert_eq!(chain.hops.last().unwrap().to, "https://d.com"); + assert_eq!(chain.hops.len(), 3); +} + +#[test] +fn test_chain_serialization() { + let chain = RedirectChain { + hops: vec![RedirectHop { + from: "https://example.com".to_string(), + to: "https://example.com/new".to_string(), + status: 301, + }], + }; + let json = serde_json::to_string(&chain).expect("should serialize"); + assert!(json.contains("example.com")); + assert!(json.contains("301")); + + let deserialized: RedirectChain = + serde_json::from_str(&json).expect("should deserialize"); + assert_eq!(deserialized.hops.len(), 1); + assert_eq!(deserialized.hops[0].status, 301); +} diff --git a/crates/pardus-core/tests/session_store_test.rs b/crates/pardus-core/tests/session_store_test.rs new file mode 100644 index 0000000..679301c --- /dev/null +++ b/crates/pardus-core/tests/session_store_test.rs @@ -0,0 +1,185 @@ +//! Tests for SessionStore — ephemeral sessions, cookies, headers, localStorage. +//! +//! Verifies that ephemeral sessions work correctly, localStorage CRUD works, +//! and that header parsing produces expected values. + +use base64::Engine; +use pardus_core::SessionStore; +use std::path::PathBuf; + +fn tmp_dir() -> PathBuf { + let ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_millis(); + std::env::temp_dir().join(format!("pardus-test-session-{}", ms)) +} + +// --------------------------------------------------------------------------- +// Creation +// --------------------------------------------------------------------------- + +#[test] +fn test_ephemeral_session_create() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).expect("should create ephemeral"); + assert_eq!(store.session_name(), "test"); + assert_eq!(store.cookie_count(), 0); + assert_eq!(store.header_count(), 0); +} + +#[test] +fn test_persistent_session_create() { + let dir = tmp_dir(); + let store = SessionStore::load("persistent-test", &dir).expect("should create persistent"); + assert_eq!(store.session_name(), "persistent-test"); +} + +// --------------------------------------------------------------------------- +// Cookies +// --------------------------------------------------------------------------- + +#[test] +fn test_set_and_count_cookies() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).unwrap(); + assert_eq!(store.cookie_count(), 0); + + store.set_cookie("session", "abc123", "example.com", "/"); + assert_eq!(store.cookie_count(), 1); + store.set_cookie("theme", "dark", "example.com", "/"); + assert_eq!(store.cookie_count(), 2); +} + +#[test] +fn test_clear_cookies() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).unwrap(); + store.set_cookie("a", "1", "example.com", "/"); + store.set_cookie("b", "2", "example.com", "/"); + assert_eq!(store.cookie_count(), 2); + store.clear_cookies(); + assert_eq!(store.cookie_count(), 0); +} + +#[test] +fn test_delete_cookie() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).unwrap(); + store.set_cookie("keep", "1", "example.com", "/"); + store.set_cookie("remove", "2", "example.com", "/"); + let deleted = store.delete_cookie("remove", "example.com", "/"); + assert!(deleted); + assert_eq!(store.cookie_count(), 1); +} + +#[test] +fn test_all_cookies() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).unwrap(); + store.set_cookie("a", "1", "x.com", "/"); + store.set_cookie("b", "2", "y.com", "/"); + let cookies = store.all_cookies(); + assert_eq!(cookies.len(), 2); +} + +// --------------------------------------------------------------------------- +// Headers +// --------------------------------------------------------------------------- + +#[test] +fn test_add_and_count_headers() { + let dir = tmp_dir(); + let store = SessionStore::ephemeral("test", &dir).unwrap(); + assert_eq!(store.header_count(), 0); + store.add_header("Authorization", "Bearer token123"); + assert_eq!(store.header_count(), 1); + store.add_header("X-Custom", "value"); + assert_eq!(store.header_count(), 2); +} + +// --------------------------------------------------------------------------- +// localStorage — persistent session needed (ephemeral sets no_local_storage: true) +// --------------------------------------------------------------------------- + +#[test] +fn test_local_storage_crud() { + let dir = tmp_dir(); + let store = SessionStore::load("test", &dir).unwrap(); + assert!(store.local_storage_get("https://example.com", "key").is_none()); + store.local_storage_set("https://example.com", "key", "value"); + assert_eq!( + store.local_storage_get("https://example.com", "key"), + Some("value".to_string()) + ); + let keys = store.local_storage_keys("https://example.com"); + assert_eq!(keys, vec!["key"]); + store.local_storage_remove("https://example.com", "key"); + assert!(store.local_storage_get("https://example.com", "key").is_none()); +} + +#[test] +fn test_local_storage_origins() { + let dir = tmp_dir(); + let store = SessionStore::load("test", &dir).unwrap(); + store.local_storage_set("https://a.com", "k", "v"); + store.local_storage_set("https://b.com", "k", "v"); + let origins = store.local_storage_origins(); + assert_eq!(origins.len(), 2); + assert!(origins.contains(&"https://a.com".to_string())); + assert!(origins.contains(&"https://b.com".to_string())); +} + +#[test] +fn test_local_storage_clear() { + let dir = tmp_dir(); + let store = SessionStore::load("test", &dir).unwrap(); + store.local_storage_set("https://example.com", "a", "1"); + store.local_storage_set("https://example.com", "b", "2"); + store.local_storage_clear("https://example.com"); + assert!(store.local_storage_keys("https://example.com").is_empty()); +} + +// --------------------------------------------------------------------------- +// Auth header parsing +// --------------------------------------------------------------------------- + +#[test] +fn test_parse_bearer_auth() { + let result = SessionStore::parse_auth_header("bearer:abc123"); + assert_eq!( + result, + Some(("Authorization".to_string(), "Bearer abc123".to_string())) + ); +} + +#[test] +fn test_parse_basic_auth() { + // parse_auth_header expects "basic:user:pass" and base64-encodes it + let result = SessionStore::parse_auth_header("basic:user:pass"); + let expected_b64 = base64::engine::general_purpose::STANDARD.encode("user:pass"); + assert_eq!( + result, + Some(("Authorization".to_string(), format!("Basic {}", expected_b64))) + ); +} + +#[test] +fn test_parse_auth_invalid() { + assert!(SessionStore::parse_auth_header("invalid").is_none()); + assert!(SessionStore::parse_auth_header("").is_none()); +} + +#[test] +fn test_parse_custom_header() { + let result = SessionStore::parse_custom_header("X-API-Key: mykey123"); + assert_eq!( + result, + Some(("X-API-Key".to_string(), "mykey123".to_string())) + ); +} + +#[test] +fn test_parse_custom_header_no_colon() { + assert!(SessionStore::parse_custom_header("no-colon-here").is_none()); +} diff --git a/crates/pardus-core/tests/user_agent_test.rs b/crates/pardus-core/tests/user_agent_test.rs new file mode 100644 index 0000000..10efb59 --- /dev/null +++ b/crates/pardus-core/tests/user_agent_test.rs @@ -0,0 +1,163 @@ +//! Tests for the user-agent fix in build_http_client. +//! +//! Before the fix, `.default_headers()` was called AFTER `.user_agent()`, +//! but since rquest's `default_headers()` uses `std::mem::swap` (replacing all +//! headers), the User-Agent was silently lost. The fix reorders the calls so +//! `.user_agent()` is applied last. + +use pardus_core::BrowserConfig; + +/// The default User-Agent string set in BrowserConfig. +const EXPECTED_UA: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) \ + AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"; + +// --------------------------------------------------------------------------- +// Unit tests (no network) +// --------------------------------------------------------------------------- + +#[test] +fn test_config_default_user_agent_is_chrome() { + let config = BrowserConfig::default(); + let ua = config.effective_user_agent(); + assert!( + ua.contains("Chrome"), + "default user-agent should contain 'Chrome', got: {ua}" + ); + assert!( + ua.contains("Mozilla/5.0"), + "default user-agent should start with 'Mozilla/5.0', got: {ua}" + ); +} + +#[test] +fn test_config_custom_user_agent_is_preserved() { + let custom_ua = "TestBot/1.0"; + let mut config = BrowserConfig::default(); + config.user_agent = custom_ua.to_string(); + assert_eq!( + config.effective_user_agent(), + custom_ua, + "custom user-agent should be returned by effective_user_agent()" + ); +} + +// --------------------------------------------------------------------------- +// Integration tests (require network) +// --------------------------------------------------------------------------- + +/// Verify that the HTTP client built by `build_http_client` actually sends +/// the User-Agent header. Uses httpbin.org/headers which echoes request +/// headers back as JSON. +/// +/// This is the core regression test for the bug where `.default_headers()` +/// was wiping the User-Agent. +#[tokio::test] +async fn test_http_client_sends_user_agent() { + let config = BrowserConfig::default(); + let client = pardus_core::app::build_http_client(&config) + .expect("build_http_client should succeed"); + + let resp = client + .get("https://httpbin.org/headers") + .send() + .await + .expect("request to httpbin should succeed"); + + let body: serde_json::Value = resp + .json() + .await + .expect("response should be valid JSON"); + + let headers = body + .get("headers") + .expect("response should contain 'headers' object"); + + let ua = headers + .get("User-Agent") + .and_then(|v| v.as_str()) + .unwrap_or(""); + + assert!( + !ua.is_empty(), + "User-Agent header must not be empty — the build_http_client fix may have regressed" + ); + assert_eq!( + ua, EXPECTED_UA, + "User-Agent should match the default BrowserConfig value" + ); +} + +/// Verify that a custom user-agent is sent when configured. +#[tokio::test] +async fn test_http_client_sends_custom_user_agent() { + let custom_ua = "PardusTestBot/2.0 (Integration Test)"; + let mut config = BrowserConfig::default(); + config.user_agent = custom_ua.to_string(); + + let client = pardus_core::app::build_http_client(&config) + .expect("build_http_client should succeed"); + + let resp = client + .get("https://httpbin.org/headers") + .send() + .await + .expect("request to httpbin should succeed"); + + let body: serde_json::Value = resp + .json() + .await + .expect("response should be valid JSON"); + + let ua = body + .get("headers") + .and_then(|h| h.get("User-Agent")) + .and_then(|v| v.as_str()) + .unwrap_or(""); + + assert_eq!( + ua, custom_ua, + "custom User-Agent should be sent, got: {ua}" + ); +} + +/// Verify that Chrome-like sec-ch-ua headers are also present (not wiped +/// by the header ordering). +#[tokio::test] +async fn test_http_client_sends_sec_ch_ua_headers() { + let config = BrowserConfig::default(); + let client = pardus_core::app::build_http_client(&config) + .expect("build_http_client should succeed"); + + let resp = client + .get("https://httpbin.org/headers") + .send() + .await + .expect("request to httpbin should succeed"); + + let body: serde_json::Value = resp + .json() + .await + .expect("response should be valid JSON"); + + let headers = body.get("headers").expect("should contain headers"); + + let sec_ch_ua = headers + .get("Sec-Ch-Ua") + .and_then(|v| v.as_str()) + .unwrap_or(""); + + assert!( + sec_ch_ua.contains("Chrome"), + "sec-ch-ua should mention Chrome, got: {sec_ch_ua}" + ); + + let sec_fetch_dest = headers + .get("Sec-Fetch-Dest") + .and_then(|v| v.as_str()) + .unwrap_or(""); + + assert_eq!( + sec_fetch_dest, "document", + "sec-fetch-dest should be 'document', got: {sec_fetch_dest}" + ); +} diff --git a/crates/pardus-kg/Cargo.toml b/crates/pardus-kg/Cargo.toml index cff5e1a..1281027 100644 --- a/crates/pardus-kg/Cargo.toml +++ b/crates/pardus-kg/Cargo.toml @@ -9,6 +9,7 @@ pardus-debug = { path = "../pardus-debug" } serde = { workspace = true, features = ["derive"] } serde_json = { workspace = true } tokio = { workspace = true } +futures = "0.3" anyhow = { workspace = true } tracing = { workspace = true } url = { workspace = true } diff --git a/crates/pardus-kg/src/config.rs b/crates/pardus-kg/src/config.rs index 6e17507..639e6a3 100644 --- a/crates/pardus-kg/src/config.rs +++ b/crates/pardus-kg/src/config.rs @@ -10,14 +10,16 @@ pub struct CrawlConfig { pub max_pages: usize, /// Polite delay between requests in milliseconds. pub delay_ms: u64, - /// Whether to verify transitions by following them. - pub verify_transitions: bool, + /// Maximum concurrent page fetches. + pub concurrency: usize, /// Whether to discover pagination transitions. pub discover_pagination: bool, /// Whether to discover hash navigation transitions. pub discover_hash_nav: bool, /// Whether to discover form submission transitions. pub discover_forms: bool, + /// Whether to store full semantic trees in view states. + pub store_full_trees: bool, /// Proxy configuration for HTTP traffic. pub proxy: ProxyConfig, } @@ -28,10 +30,11 @@ impl Default for CrawlConfig { max_depth: 3, max_pages: 50, delay_ms: 200, - verify_transitions: true, + concurrency: 4, discover_pagination: true, discover_hash_nav: true, discover_forms: false, + store_full_trees: true, proxy: ProxyConfig::default(), } } diff --git a/crates/pardus-kg/src/crawler.rs b/crates/pardus-kg/src/crawler.rs index 65a07f4..560ac4f 100644 --- a/crates/pardus-kg/src/crawler.rs +++ b/crates/pardus-kg/src/crawler.rs @@ -1,14 +1,18 @@ use std::collections::{HashSet, VecDeque}; use std::sync::Arc; -use std::time::Instant; +use std::time::{Duration, Instant}; use anyhow::Result; -use tracing::{info, warn, debug}; +use futures::stream::{FuturesUnordered, StreamExt}; +use tokio::sync::Semaphore; +use tracing::{debug, info, warn}; use url::Url; use pardus_core::app::App; use pardus_core::config::BrowserConfig; +use pardus_core::navigation::graph::NavigationGraph; use pardus_core::page::Page; +use pardus_core::page_analysis::PageAnalysis; use crate::config::CrawlConfig; use crate::discovery::{self, DiscoveredTransition}; @@ -17,20 +21,24 @@ use crate::graph::KnowledgeGraph; use crate::state::{ViewState, ViewStateId}; use crate::transition::{Transition, TransitionOutcome, Trigger}; -/// A queued entry in the BFS frontier. struct FrontierEntry { url: String, depth: usize, parent_id: Option, trigger: Option, + retries: u8, +} + +struct ProcessedPage { + entry: FrontierEntry, + state_id: ViewStateId, + discovered: Vec, } -/// Crawl a site and build its Knowledge Graph. pub async fn crawl(root_url: &str, config: &CrawlConfig) -> Result { crawl_with_config(root_url, config).await } -/// Crawl a site with explicit configuration. pub async fn crawl_with_config(root_url: &str, config: &CrawlConfig) -> Result { let start = Instant::now(); @@ -43,118 +51,84 @@ pub async fn crawl_with_config(root_url: &str, config: &CrawlConfig) -> Result = VecDeque::new(); frontier.push_back(FrontierEntry { url: root_url.to_string(), depth: 0, parent_id: None, trigger: None, + retries: 0, }); - // Track normalized URLs already enqueued to avoid re-enqueuing let mut url_seen: HashSet = HashSet::new(); url_seen.insert(normalize_url(root_url)); + let semaphore = Arc::new(Semaphore::new(config.concurrency)); let mut pages_crawled = 0usize; let mut max_depth_reached = 0usize; - while let Some(entry) = frontier.pop_front() { - // Check limits + while !frontier.is_empty() { if pages_crawled >= config.max_pages { debug!("Max pages reached ({})", config.max_pages); break; } - if entry.depth > config.max_depth { - continue; - } - // Polite delay - if pages_crawled > 0 { - tokio::time::sleep(std::time::Duration::from_millis(config.delay_ms)).await; - } + let batch_size = frontier.len().min(config.concurrency); + let batch: Vec = frontier.drain(..batch_size).collect(); + + let mut in_flight = FuturesUnordered::new(); + let mut batch_reserved = 0usize; - // Fetch page - info!(url = %entry.url, depth = entry.depth, "Fetching page"); - let page = match Page::from_url(&app, &entry.url).await { - Ok(p) => p, - Err(e) => { - warn!(url = %entry.url, error = %e, "Failed to fetch page"); + for entry in batch { + if entry.depth > config.max_depth { continue; } - }; - pages_crawled += 1; - if entry.depth > max_depth_reached { - max_depth_reached = entry.depth; - } - - // Build fingerprint and ViewStateId - let tree = page.semantic_tree(); - let nav_graph = page.navigation_graph(); - let resource_urls = discover_resources(&page.html, &page.base_url); - let (fingerprint, state_id) = compute_fingerprint(&page.url, &tree, &resource_urls); - - // Record incoming transition - if let Some(ref parent_id) = entry.parent_id { - if let Some(ref trigger) = entry.trigger { - graph.add_transition(Transition { - from: parent_id.clone(), - to: state_id.clone(), - trigger: trigger.clone(), - verified: true, - outcome: Some(TransitionOutcome { - status: page.status, - final_url: page.url.clone(), - matched_prediction: true, - }), - }); + if pages_crawled + batch_reserved >= config.max_pages { + break; } - } - // Dedup by ViewStateId - if graph.has_state(&state_id.0) { - debug!(id = %state_id.0, "State already known, skipping discovery"); - continue; + let app = Arc::clone(&app); + let sem = semaphore.clone(); + let delay = if pages_crawled + batch_reserved > 0 { + Some(Duration::from_millis(config.delay_ms)) + } else { + None + }; + + batch_reserved += 1; + + in_flight.push(async move { + if let Some(dur) = delay { + tokio::time::sleep(dur).await; + } + let _permit = sem.acquire().await; + let page = Page::from_url(&app, &entry.url).await; + (entry, page) + }); } - // Build and record ViewState - let view_state = ViewState { - id: state_id.clone(), - url: page.url.clone(), - fragment: fingerprint.fragment.clone(), - fingerprint, - semantic_tree: tree, - navigation_graph: nav_graph, - resource_urls, - title: page.title(), - status: page.status, - }; - - info!(id = %state_id.0, url = %view_state.url, "New view-state discovered"); - graph.add_state(view_state); - - // Discover outgoing transitions if not at max depth - if entry.depth < config.max_depth { - let discovered = discover_transitions_for_page( - &graph, - &app, - &page, - &state_id, - &root_origin, - config, - ); - - for dt in discovered { - let normalized = normalize_url_for_frontier(&dt.target_url, &root_origin); - if url_seen.insert(normalized) { - frontier.push_back(FrontierEntry { - url: dt.target_url, - depth: entry.depth + 1, - parent_id: Some(state_id.clone()), - trigger: Some(dt.trigger), - }); + while let Some((entry, page_result)) = in_flight.next().await { + let page = match page_result { + Ok(p) => p, + Err(e) => { + warn!(url = %entry.url, error = %e, "Failed to fetch page"); + if entry.retries < 2 { + frontier.push_back(FrontierEntry { + retries: entry.retries + 1, + ..entry + }); + } + continue; } + }; + + pages_crawled += 1; + if entry.depth > max_depth_reached { + max_depth_reached = entry.depth; } + + let processed = process_page(entry, &page, &mut graph, &root_origin, config); + enqueue_transitions(processed, &mut frontier, &mut url_seen, &root_origin); } } @@ -172,10 +146,104 @@ pub async fn crawl_with_config(root_url: &str, config: &CrawlConfig) -> Result Option { + let analysis = PageAnalysis::build(&page.html, &page.url); + let resource_urls = discover_resources(&page.html, &page.base_url); + let (fingerprint, state_id) = compute_fingerprint(&page.url, &analysis.semantic_tree, &resource_urls); + + if let Some(ref parent_id) = entry.parent_id { + if let Some(ref trigger) = entry.trigger { + graph.add_transition(Transition { + from: parent_id.clone(), + to: state_id.clone(), + trigger: trigger.clone(), + verified: true, + outcome: Some(TransitionOutcome { + status: page.status, + final_url: page.url.clone(), + matched_prediction: true, + }), + }); + } + } + + if graph.has_state(&state_id) { + debug!(id = %state_id.0, "State already known, skipping discovery"); + return None; + } + + let (semantic_tree, navigation_graph) = if config.store_full_trees { + (Some(analysis.semantic_tree), Some(analysis.navigation_graph.clone())) + } else { + (None, None) + }; + + let view_state = ViewState { + id: state_id.clone(), + url: page.url.clone(), + fragment: fingerprint.fragment.clone(), + fingerprint, + semantic_tree, + navigation_graph, + resource_urls, + title: page.title(), + status: page.status, + }; + + info!(id = %state_id.0, url = %view_state.url, "New view-state discovered"); + graph.add_state(view_state); + + if entry.depth < config.max_depth { + let discovered = discover_transitions_for_page( + &analysis.navigation_graph, + page, + &state_id, + root_origin, + config, + ); + Some(ProcessedPage { + entry, + state_id, + discovered, + }) + } else { + None + } +} + +fn enqueue_transitions( + processed: Option, + frontier: &mut VecDeque, + url_seen: &mut HashSet, + root_origin: &str, +) { + let Some(processed) = processed else { return }; + + for dt in processed.discovered { + if !is_same_origin(&dt.target_url, root_origin) { + continue; + } + let normalized = normalize_url(&dt.target_url); + if url_seen.insert(normalized) { + frontier.push_back(FrontierEntry { + url: dt.target_url, + depth: processed.entry.depth + 1, + parent_id: Some(processed.state_id.clone()), + trigger: Some(dt.trigger), + retries: 0, + }); + } + } +} + fn discover_transitions_for_page( - _graph: &KnowledgeGraph, - _app: &Arc, + nav_graph: &NavigationGraph, page: &Page, state_id: &ViewStateId, root_origin: &str, @@ -183,25 +251,20 @@ fn discover_transitions_for_page( ) -> Vec { let mut all = Vec::new(); - // 1. Link transitions - let nav_graph = page.navigation_graph(); all.extend(discovery::discover_link_transitions( - &nav_graph, root_origin, state_id, + nav_graph, root_origin, state_id, )); - // 2. Hash navigation if config.discover_hash_nav { let hash_transitions = discovery::discover_hash_transitions(&page.html, &page.url); all.extend(hash_transitions); } - // 3. Pagination if config.discover_pagination { let pagination_transitions = discovery::discover_pagination_transitions(&page.url); all.extend(pagination_transitions); } - // 4. Forms (optional — predicted, unverified) if config.discover_forms { for form in &nav_graph.forms { let action_url = form.action.clone().unwrap_or_default(); @@ -220,12 +283,25 @@ fn discover_transitions_for_page( all } -/// Normalize a URL for dedup: lowercase, strip fragment, sort query params, strip trailing slash. fn normalize_url(url: &str) -> String { let Ok(mut parsed) = Url::parse(url) else { return url.to_lowercase(); }; parsed.set_fragment(None); + + let mut pairs: Vec<(String, String)> = parsed + .query_pairs() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(); + pairs.sort_by(|a, b| a.0.cmp(&b.0)); + { + let mut q = parsed.query_pairs_mut(); + q.clear(); + for (k, v) in &pairs { + q.append_pair(k, v); + } + } + let mut result = parsed.to_string(); if result.ends_with('/') && !result.ends_with("://") { result.pop(); @@ -233,7 +309,8 @@ fn normalize_url(url: &str) -> String { result } -/// Normalize a URL for frontier dedup: strip fragment, same-origin check. -fn normalize_url_for_frontier(url: &str, _root_origin: &str) -> String { - normalize_url(url) +fn is_same_origin(url_str: &str, root_origin: &str) -> bool { + url::Url::parse(url_str) + .map(|u| u.origin().ascii_serialization() == root_origin) + .unwrap_or(false) } diff --git a/crates/pardus-kg/src/discovery.rs b/crates/pardus-kg/src/discovery.rs index 4c06c8a..11a6db7 100644 --- a/crates/pardus-kg/src/discovery.rs +++ b/crates/pardus-kg/src/discovery.rs @@ -17,13 +17,12 @@ pub struct DiscoveredTransition { /// Discover all link-click transitions from a navigation graph. pub fn discover_link_transitions( nav_graph: &NavigationGraph, - root_origin: &str, + _root_origin: &str, _parent_id: &ViewStateId, ) -> Vec { nav_graph .internal_links .iter() - .filter(|route| is_same_origin(&route.url, root_origin)) .map(|route| { let selector = format!("a[href=\"{}\"]", route.url); DiscoveredTransition { @@ -40,10 +39,7 @@ pub fn discover_link_transitions( /// Discover hash navigation transitions from page HTML. /// NavigationGraph skips href="#" links, so we need our own selector. -pub fn discover_hash_transitions( - html: &Html, - page_url: &str, -) -> Vec { +pub fn discover_hash_transitions(html: &Html, page_url: &str) -> Vec { static HASH_LINK: Lazy = Lazy::new(|| Selector::parse("a[href^='#']").expect("valid selector")); @@ -60,7 +56,11 @@ pub fn discover_hash_transitions( let label: String = el.text().collect::>().join(" ").trim().to_string(); // For hash nav, the target URL is the same page with the fragment let target_url = if page_url.contains('#') { - format!("{}#{}", page_url.split('#').next().unwrap_or(page_url), fragment) + format!( + "{}#{}", + page_url.split('#').next().unwrap_or(page_url), + fragment + ) } else { format!("{}#{}", page_url, fragment) }; @@ -86,7 +86,11 @@ pub fn discover_pagination_transitions(page_url: &str) -> Vec() { if page_num > 0 { let next_page = page_num + 1; @@ -103,7 +107,11 @@ pub fn discover_pagination_transitions(page_url: &str) -> Vec() { let page_size = detect_page_size(&url).unwrap_or(20); let next_offset = offset + page_size; @@ -119,7 +127,11 @@ pub fn discover_pagination_transitions(page_url: &str) -> Vec() { let step = detect_page_size(&url).unwrap_or(10); let next_start = start + step; @@ -136,14 +148,18 @@ pub fn discover_pagination_transitions(page_url: &str) -> Vec = path.split('/').filter(|s| !s.is_empty()).collect(); + let segments: Vec = path + .split('/') + .filter(|s| !s.is_empty()) + .map(|s| s.to_string()) + .collect(); for i in (1..segments.len()).rev() { let prev = segments[i - 1].to_lowercase(); if (prev == "page" || prev == "p") && segments[i].parse::().is_ok() { if let Ok(page_num) = segments[i].parse::() { let next_page = page_num + 1; let mut new_segments = segments.clone(); - new_segments[i] = Box::leak(next_page.to_string().into_boxed_str()); + new_segments[i] = next_page.to_string(); let new_path = format!("/{}", new_segments.join("/")); url.set_path(&new_path); results.push(DiscoveredTransition { @@ -161,13 +177,6 @@ pub fn discover_pagination_transitions(page_url: &str) -> Vec bool { - url::Url::parse(url_str) - .map(|u| u.origin().ascii_serialization() == root_origin) - .unwrap_or(false) -} - /// Detect page size from query params. fn detect_page_size(url: &url::Url) -> Option { url.query_pairs() diff --git a/crates/pardus-kg/src/fingerprint.rs b/crates/pardus-kg/src/fingerprint.rs index d63e9a5..059ea94 100644 --- a/crates/pardus-kg/src/fingerprint.rs +++ b/crates/pardus-kg/src/fingerprint.rs @@ -1,7 +1,6 @@ -use std::collections::BTreeMap; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, HashSet}; -use pardus_core::{SemanticNode, SemanticRole, SemanticTree}; +use pardus_core::{SemanticNode, SemanticTree}; use scraper::Html; use url::Url; @@ -11,7 +10,7 @@ use crate::state::{Fingerprint, ViewStateId}; pub fn compute_fingerprint( page_url: &str, tree: &SemanticTree, - resource_urls: &BTreeSet, + resource_urls: &HashSet, ) -> (Fingerprint, ViewStateId) { let parsed = Url::parse(page_url).ok(); let url_path = parsed @@ -40,7 +39,7 @@ pub fn compute_fingerprint( } /// Discover subresource URLs from HTML. -pub fn discover_resources(html: &Html, base_url: &str) -> BTreeSet { +pub fn discover_resources(html: &Html, base_url: &str) -> HashSet { let records = pardus_debug::discover::discover_subresources(html, base_url, 0); records.into_iter().map(|r| r.url).collect() } @@ -66,90 +65,56 @@ fn extract_content_params(url: Option<&Url>) -> BTreeMap { /// For each node: "{role}:{tag}:{is_interactive}:{children_count}" /// Does NOT include name, href, action, or text content. fn hash_tree_structure(tree: &SemanticTree) -> String { - let mut skeleton = String::new(); - walk_skeleton(&tree.root, &mut skeleton); - let hash = blake3::hash(skeleton.as_bytes()); - hash.to_hex().to_string() + let mut hasher = blake3::Hasher::new(); + walk_skeleton_hash(&tree.root, &mut hasher); + hasher.finalize().to_hex().to_string() } -fn walk_skeleton(node: &SemanticNode, out: &mut String) { - out.push_str(&format!( +fn walk_skeleton_hash(node: &SemanticNode, hasher: &mut blake3::Hasher) { + let buf = format!( "{}:{}:{}:{}\n", - role_str(&node.role), + node.role.role_str(), node.tag, node.is_interactive, node.children.len() - )); + ); + hasher.update(buf.as_bytes()); for child in &node.children { - walk_skeleton(child, out); + walk_skeleton_hash(child, hasher); } } -fn role_str(role: &SemanticRole) -> String { - match role { - SemanticRole::Document => "document".to_string(), - SemanticRole::Banner => "banner".to_string(), - SemanticRole::Navigation => "navigation".to_string(), - SemanticRole::Main => "main".to_string(), - SemanticRole::ContentInfo => "contentinfo".to_string(), - SemanticRole::Complementary => "complementary".to_string(), - SemanticRole::Region => "region".to_string(), - SemanticRole::Form => "form".to_string(), - SemanticRole::Search => "search".to_string(), - SemanticRole::Article => "article".to_string(), - SemanticRole::Heading { .. } => "heading".to_string(), - SemanticRole::Link => "link".to_string(), - SemanticRole::Button => "button".to_string(), - SemanticRole::TextBox => "textbox".to_string(), - SemanticRole::FileInput => "fileinput".to_string(), - SemanticRole::Checkbox => "checkbox".to_string(), - SemanticRole::Radio => "radio".to_string(), - SemanticRole::Combobox => "combobox".to_string(), - SemanticRole::List => "list".to_string(), - SemanticRole::ListItem => "listitem".to_string(), - SemanticRole::Table => "table".to_string(), - SemanticRole::Row => "row".to_string(), - SemanticRole::Cell => "cell".to_string(), - SemanticRole::ColumnHeader => "columnheader".to_string(), - SemanticRole::RowHeader => "rowheader".to_string(), - SemanticRole::Image => "img".to_string(), - SemanticRole::Dialog => "dialog".to_string(), - SemanticRole::IFrame => "iframe".to_string(), - SemanticRole::Generic => "generic".to_string(), - SemanticRole::StaticText => "text".to_string(), - SemanticRole::Other(s) => s.clone(), +/// Hash a sorted set of resource URLs using incremental hashing. +fn hash_resource_set(resources: &HashSet) -> String { + let mut sorted: Vec<&String> = resources.iter().collect(); + sorted.sort(); + let mut hasher = blake3::Hasher::new(); + for url in sorted { + hasher.update(url.as_bytes()); + hasher.update(b"\n"); } + hasher.finalize().to_hex().to_string() } -/// Hash a sorted set of resource URLs. -fn hash_resource_set(resources: &BTreeSet) -> String { - let concatenated: String = resources - .iter() - .map(|u| u.as_str()) - .collect::>() - .join("\n"); - let hash = blake3::hash(concatenated.as_bytes()); - hash.to_hex().to_string() -} - -/// Compute ViewStateId from fingerprint components. +/// Compute ViewStateId from fingerprint components using incremental hashing. fn compute_view_state_id(fp: &Fingerprint) -> ViewStateId { - let mut composite = String::new(); - composite.push_str(&fp.url_path); - composite.push('|'); + let mut hasher = blake3::Hasher::new(); + hasher.update(fp.url_path.as_bytes()); + hasher.update(b"|"); for (k, v) in &fp.content_query_params { - composite.push_str(&format!("{}={}", k, v)); + hasher.update(k.as_bytes()); + hasher.update(b"="); + hasher.update(v.as_bytes()); } - composite.push('|'); - composite.push_str(&fp.tree_hash); - composite.push('|'); - composite.push_str(&fp.resource_set_hash); + hasher.update(b"|"); + hasher.update(fp.tree_hash.as_bytes()); + hasher.update(b"|"); + hasher.update(fp.resource_set_hash.as_bytes()); if let Some(ref frag) = fp.fragment { - composite.push('|'); - composite.push_str(frag); + hasher.update(b"|"); + hasher.update(frag.as_bytes()); } - let hash = blake3::hash(composite.as_bytes()); - ViewStateId(hash.to_hex().to_string()) + ViewStateId(hasher.finalize().to_hex().to_string()) } #[cfg(test)] @@ -170,7 +135,6 @@ mod tests { let t2 = build_tree( r#"

World

"#, ); - // Same structure, different text → same hash assert_eq!(hash_tree_structure(&t1), hash_tree_structure(&t2)); } @@ -180,17 +144,16 @@ mod tests { let t2 = build_tree( r#""#, ); - // Different structure (1 link vs 2 links) assert_ne!(hash_tree_structure(&t1), hash_tree_structure(&t2)); } #[test] fn test_resource_set_hash_consistent() { - let mut set1 = BTreeSet::new(); + let mut set1 = HashSet::new(); set1.insert("https://example.com/a.css".to_string()); set1.insert("https://example.com/b.js".to_string()); - let mut set2 = BTreeSet::new(); + let mut set2 = HashSet::new(); set2.insert("https://example.com/b.js".to_string()); set2.insert("https://example.com/a.css".to_string()); @@ -200,7 +163,7 @@ mod tests { #[test] fn test_view_state_id_deterministic() { let tree = build_tree("

Test

"); - let mut resources = BTreeSet::new(); + let mut resources = HashSet::new(); resources.insert("https://example.com/style.css".to_string()); let (fp1, id1) = compute_fingerprint("https://example.com/", &tree, &resources); @@ -213,7 +176,7 @@ mod tests { #[test] fn test_different_urls_different_ids() { let tree = build_tree("

Test

"); - let resources = BTreeSet::new(); + let resources = HashSet::new(); let (_, id1) = compute_fingerprint("https://example.com/", &tree, &resources); let (_, id2) = compute_fingerprint("https://example.com/about", &tree, &resources); @@ -224,7 +187,7 @@ mod tests { #[test] fn test_fragment_creates_different_id() { let tree = build_tree("

Test

"); - let resources = BTreeSet::new(); + let resources = HashSet::new(); let (_, id1) = compute_fingerprint("https://example.com/#section1", &tree, &resources); let (_, id2) = compute_fingerprint("https://example.com/#section2", &tree, &resources); diff --git a/crates/pardus-kg/src/graph.rs b/crates/pardus-kg/src/graph.rs index 3bfc5d5..7236c7c 100644 --- a/crates/pardus-kg/src/graph.rs +++ b/crates/pardus-kg/src/graph.rs @@ -2,7 +2,7 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; use crate::config::CrawlConfig; -use crate::state::ViewState; +use crate::state::{ViewState, ViewStateId}; use crate::transition::Transition; /// The complete knowledge graph of a site. @@ -14,8 +14,8 @@ pub struct KnowledgeGraph { pub built_at: String, /// Crawl configuration used. pub config: CrawlConfig, - /// All view-states, keyed by ViewStateId string. - pub states: HashMap, + /// All view-states, keyed by ViewStateId. + pub states: HashMap, /// All transitions. pub transitions: Vec, /// Summary statistics. @@ -60,7 +60,8 @@ impl KnowledgeGraph { /// Add a view-state. Returns true if it was new. pub fn add_state(&mut self, state: ViewState) -> bool { - self.states.insert(state.id.0.clone(), state).is_none() + let id = state.id.clone(); + self.states.insert(id, state).is_none() } /// Add a transition. @@ -69,12 +70,17 @@ impl KnowledgeGraph { } /// Check if a ViewStateId is already known. - pub fn has_state(&self, id: &str) -> bool { + pub fn has_state(&self, id: &ViewStateId) -> bool { self.states.contains_key(id) } /// Compute final stats. - pub fn compute_stats(&mut self, max_depth_reached: usize, pages_crawled: usize, duration_ms: u128) { + pub fn compute_stats( + &mut self, + max_depth_reached: usize, + pages_crawled: usize, + duration_ms: u128, + ) { self.stats = KgStats { total_states: self.states.len(), total_transitions: self.transitions.len(), diff --git a/crates/pardus-kg/src/state.rs b/crates/pardus-kg/src/state.rs index 02b741c..5449b49 100644 --- a/crates/pardus-kg/src/state.rs +++ b/crates/pardus-kg/src/state.rs @@ -1,9 +1,8 @@ use serde::{Deserialize, Serialize}; -use std::collections::BTreeMap; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, HashSet}; -use pardus_core::SemanticTree; use pardus_core::NavigationGraph; +use pardus_core::SemanticTree; /// Unique fingerprint identifying a distinct page state. #[derive(Debug, Clone, Hash, Eq, PartialEq, Serialize, Deserialize)] @@ -34,12 +33,14 @@ pub struct ViewState { pub fragment: Option, /// Fingerprint components. pub fingerprint: Fingerprint, - /// Semantic tree from pardus-core. - pub semantic_tree: SemanticTree, - /// Navigation graph from pardus-core. - pub navigation_graph: NavigationGraph, + /// Semantic tree from pardus-core (only when `store_full_trees` is enabled). + #[serde(skip_serializing_if = "Option::is_none")] + pub semantic_tree: Option, + /// Navigation graph from pardus-core (only when `store_full_trees` is enabled). + #[serde(skip_serializing_if = "Option::is_none")] + pub navigation_graph: Option, /// The set of subresource URLs loaded by this state. - pub resource_urls: BTreeSet, + pub resource_urls: HashSet, /// Page title. pub title: Option, /// HTTP status code. diff --git a/crates/pardus-tauri/frontend/index.html b/crates/pardus-tauri/frontend/index.html new file mode 100644 index 0000000..2fe6772 --- /dev/null +++ b/crates/pardus-tauri/frontend/index.html @@ -0,0 +1,15 @@ + + + + + + Pardus Mission Control + + + +
+ + + diff --git a/crates/pardus-tauri/frontend/package.json b/crates/pardus-tauri/frontend/package.json new file mode 100644 index 0000000..dd92c0c --- /dev/null +++ b/crates/pardus-tauri/frontend/package.json @@ -0,0 +1,28 @@ +{ + "name": "pardus-mission-control", + "private": true, + "version": "0.1.0", + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc -b && vite build", + "preview": "vite preview", + "lint": "eslint ." + }, + "dependencies": { + "@tauri-apps/api": "^2.0.0", + "react": "^19.0.0", + "react-dom": "^19.0.0" + }, + "devDependencies": { + "@types/react": "^19.0.0", + "@types/react-dom": "^19.0.0", + "@vitejs/plugin-react": "^4.0.0", + "eslint": "^9.0.0", + "@eslint/js": "^9.0.0", + "eslint-plugin-react-hooks": "^5.0.0", + "eslint-plugin-react-refresh": "^0.4.0", + "typescript": "~5.7.0", + "vite": "^6.0.0" + } +} diff --git a/crates/pardus-tauri/frontend/src/App.tsx b/crates/pardus-tauri/frontend/src/App.tsx new file mode 100644 index 0000000..97fd85b --- /dev/null +++ b/crates/pardus-tauri/frontend/src/App.tsx @@ -0,0 +1,42 @@ +import { AgentProvider, useAgent } from "./context/AgentContext"; +import { AgentSidebar } from "./components/AgentSidebar"; +import { InstanceHeader } from "./components/InstanceHeader"; +import { TreeViewer } from "./components/TreeViewer"; +import { ActionLog } from "./components/ActionLog"; +import { InteractionBar } from "./components/InteractionBar"; +import { ChallengePanel } from "./components/ChallengePanel"; + +function Dashboard() { + const { events } = useAgent(); + + return ( +
+ +
+ +
+
+
+ +
+
+ +
+
+ +
+ +
+
+ ); +} + +export function App() { + return ( + + + + ); +} diff --git a/crates/pardus-tauri/frontend/src/api/tauri.ts b/crates/pardus-tauri/frontend/src/api/tauri.ts new file mode 100644 index 0000000..341a5f6 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/api/tauri.ts @@ -0,0 +1,164 @@ +import { invoke } from "@tauri-apps/api/core"; +import { listen } from "@tauri-apps/api/event"; +import type { + InstanceInfo, + SemanticNode, + TreeStats, + BridgeStatus, + AgentStatus, + CdpEvent, + StatusChange, +} from "../types"; + +// --------------------------------------------------------------------------- +// Instance management +// --------------------------------------------------------------------------- + +export async function listInstances(): Promise { + return invoke("list_instances"); +} + +export async function spawnInstance(): Promise { + return invoke("spawn_instance"); +} + +export async function killInstance(id: string): Promise { + return invoke("kill_instance", { id }); +} + +export async function killAllInstances(): Promise { + return invoke("kill_all_instances"); +} + +// --------------------------------------------------------------------------- +// CDP bridge +// --------------------------------------------------------------------------- + +export async function connectInstance(instanceId: string): Promise { + return invoke("connect_instance", { instanceId }); +} + +export async function disconnectInstance(instanceId: string): Promise { + return invoke("disconnect_instance", { instanceId }); +} + +export async function executeCdp( + instanceId: string, + method: string, + params: Record, +): Promise { + return invoke("execute_cdp", { instanceId, method, params }); +} + +export async function getSemanticTree( + instanceId: string, +): Promise<{ semanticTree: { root: SemanticNode; stats: TreeStats } }> { + return invoke("get_semantic_tree", { instanceId }); +} + +export async function getBridgeStatus( + instanceId: string, +): Promise { + return invoke("get_bridge_status", { instanceId }); +} + +export async function getInstanceEvents( + instanceId: string, + limit?: number, + since?: number, +): Promise<{ method: string; params: Record; timestamp: number }[]> { + return invoke("get_instance_events", { instanceId, limit: limit ?? 100, since }); +} + +// --------------------------------------------------------------------------- +// Agent status +// --------------------------------------------------------------------------- + +export async function setAgentStatus( + instanceId: string, + status: AgentStatus, +): Promise { + return invoke("set_agent_status", { instanceId, status }); +} + +// --------------------------------------------------------------------------- +// Browser windows +// --------------------------------------------------------------------------- + +export async function openBrowserWindow( + instanceId: string, + url?: string, +): Promise { + return invoke("open_browser_window", { instanceId, url }); +} + +export async function closeBrowserWindow(instanceId: string): Promise { + return invoke("close_browser_window", { instanceId }); +} + +// --------------------------------------------------------------------------- +// Event listeners +// --------------------------------------------------------------------------- + +type UnlistenFn = () => void; + +export function onCdpEvent( + handler: (event: CdpEvent) => void, +): Promise { + return listen("cdp-event", (e) => handler(e.payload)); +} + +export function onAgentStatusChanged( + handler: (event: StatusChange) => void, +): Promise { + return listen("agent-status-changed", (e) => handler(e.payload)); +} + +export async function openChallengeWindow( + url: string, + title?: string, +): Promise { + return invoke("open_challenge_window", { url, title }); +} + +export function onChallengeDetected( + handler: (info: { url: string; status: number; kinds: string[]; risk_score: number }) => void, +): Promise { + return listen<{ url: string; status: number; kinds: string[]; risk_score: number }>( + "challenge-detected", + (e) => handler(e.payload), + ); +} + +export function onChallengeSolved( + handler: (info: { url: string }) => void, +): Promise { + return listen<{ url: string }>("challenge-solved", (e) => handler(e.payload)); +} + +export function onChallengeFailed( + handler: (info: { challenge_url: string; reason: string }) => void, +): Promise { + return listen<{ challenge_url: string; reason: string }>( + "challenge-failed", + (e) => handler(e.payload), + ); +} + +export function onCdpBridgeConnected( + handler: (info: { instance_id: string; port: number }) => void, +): Promise { + return listen<{ instance_id: string; port: number }>( + "cdp-bridge-connected", + (e) => handler(e.payload), + ); +} + +export function onCdpBridgeDisconnected( + handler: (info: { instance_id: string; port: number }) => void, +): Promise { + return listen<{ instance_id: string; port: number }>( + "cdp-bridge-disconnected", + (e) => handler(e.payload), + ); +} diff --git a/crates/pardus-tauri/frontend/src/components/ActionLog.tsx b/crates/pardus-tauri/frontend/src/components/ActionLog.tsx new file mode 100644 index 0000000..db77df1 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/ActionLog.tsx @@ -0,0 +1,137 @@ +import { useRef, useEffect, useState } from "react"; +import type { CdpEvent } from "../types"; + +interface ActionEntry { + id: string; + timestamp: number; + type: "navigate" | "action_start" | "action_complete" | "action_fail"; + summary: string; + detail?: string; +} + +function classifyEvent(event: CdpEvent): ActionEntry | null { + const { method, params, timestamp } = event; + + if (method === "Page.frameNavigated") { + const frame = (params as { frame?: { url?: string } })?.frame; + const url = frame?.url ?? "unknown"; + return { + id: `nav-${timestamp}`, + timestamp, + type: "navigate", + summary: "Navigate", + detail: url, + }; + } + + if (method === "Pardus.actionStarted") { + const p = params as { action?: string; target?: { selector?: string } }; + const action = p?.action ?? "unknown"; + const selector = p?.target?.selector ?? ""; + return { + id: `act-s-${timestamp}`, + timestamp, + type: "action_start", + summary: action.charAt(0).toUpperCase() + action.slice(1), + detail: selector, + }; + } + + if (method === "Pardus.actionCompleted") { + const p = params as { action?: string; result?: { note?: string } }; + const action = p?.action ?? "unknown"; + const note = p?.result?.note ?? ""; + return { + id: `act-c-${timestamp}`, + timestamp, + type: "action_complete", + summary: `${action} done`, + detail: note || undefined, + }; + } + + if (method === "Pardus.actionFailed") { + const p = params as { action?: string; result?: { error?: string } }; + const action = p?.action ?? "unknown"; + const error = p?.result?.error ?? "unknown error"; + return { + id: `act-f-${timestamp}`, + timestamp, + type: "action_fail", + summary: `${action} failed`, + detail: error, + }; + } + + return null; +} + +function formatTime(ts: number): string { + const d = new Date(ts); + return d.toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit", second: "2-digit" }); +} + +const TYPE_ICONS: Record = { + navigate: "\u2192", + action_start: "\u25B6", + action_complete: "\u2713", + action_fail: "\u2717", +}; + +const TYPE_COLORS: Record = { + navigate: "var(--accent)", + action_start: "var(--cyan)", + action_complete: "var(--green)", + action_fail: "var(--red)", +}; + +export function ActionLog({ events }: { events: CdpEvent[] }) { + const scrollRef = useRef(null); + const [autoScroll, setAutoScroll] = useState(true); + + const entries: ActionEntry[] = events + .map(classifyEvent) + .filter((e): e is ActionEntry => e !== null); + + useEffect(() => { + if (autoScroll && scrollRef.current) { + scrollRef.current.scrollTop = scrollRef.current.scrollHeight; + } + }, [entries.length, autoScroll]); + + return ( +
+
+ Action Log + {entries.length} + +
+
+ {entries.length === 0 && ( +
No actions recorded yet.
+ )} + {entries.map((entry) => ( +
+ {formatTime(entry.timestamp)} + + {TYPE_ICONS[entry.type]} + + {entry.summary} + {entry.detail && ( + + {entry.detail} + + )} +
+ ))} +
+
+ ); +} diff --git a/crates/pardus-tauri/frontend/src/components/AgentSidebar.tsx b/crates/pardus-tauri/frontend/src/components/AgentSidebar.tsx new file mode 100644 index 0000000..01fa0e2 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/AgentSidebar.tsx @@ -0,0 +1,133 @@ +import { useAgent } from "../context/AgentContext"; +import * as api from "../api/tauri"; +import { useState, useCallback } from "react"; + +const STATUS_COLORS: Record = { + idle: "var(--text-muted)", + connected: "var(--accent)", + running: "var(--green)", + paused: "var(--yellow)", + "waiting-challenge": "var(--orange)", + error: "var(--red)", +}; + +const STATUS_LABELS: Record = { + idle: "Idle", + connected: "Ready", + running: "Running", + paused: "Paused", + "waiting-challenge": "CAPTCHA", + error: "Error", +}; + +export function AgentSidebar() { + const { instances, selectedId, select, refreshInstances } = useAgent(); + const [spawning, setSpawning] = useState(false); + + const handleSpawn = useCallback(async () => { + setSpawning(true); + try { + const inst = await api.spawnInstance(); + await api.connectInstance(inst.id); + await refreshInstances(); + select(inst.id); + } catch (e) { + console.error("Failed to spawn:", e); + } finally { + setSpawning(false); + } + }, [refreshInstances, select]); + + const handleKill = useCallback( + async (id: string) => { + try { + await api.disconnectInstance(id); + await api.killInstance(id); + await refreshInstances(); + if (selectedId === id) select(null); + } catch (e) { + console.error("Failed to kill:", e); + } + }, + [selectedId, select, refreshInstances], + ); + + const handleOpenBrowser = useCallback( + async (id: string) => { + try { + await api.openBrowserWindow(id); + await refreshInstances(); + } catch (e) { + console.error("Failed to open browser:", e); + } + }, + [refreshInstances], + ); + + return ( + + ); +} diff --git a/crates/pardus-tauri/frontend/src/components/ChallengePanel.tsx b/crates/pardus-tauri/frontend/src/components/ChallengePanel.tsx new file mode 100644 index 0000000..74789f5 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/ChallengePanel.tsx @@ -0,0 +1,106 @@ +import { useState, useEffect, useRef, useCallback } from "react"; +import type { ChallengeInfo } from "../types"; +import * as api from "../api/tauri"; + +interface ActiveChallenge { + url: string; + kinds: string[]; + riskScore: number; + resolvedAt: number | null; +} + +const RESOLVED_TTL = 10_000; +const MAX_CHALLENGES = 50; + +export function ChallengePanel() { + const [challenges, setChallenges] = useState([]); + const mountedRef = useRef(true); + const removeTimersRef = useRef>>(new Map()); + + const scheduleRemoval = useCallback((url: string) => { + const existing = removeTimersRef.current.get(url); + if (existing) clearTimeout(existing); + const timer = setTimeout(() => { + if (mountedRef.current) { + setChallenges((prev) => prev.filter((c) => c.url !== url)); + } + removeTimersRef.current.delete(url); + }, RESOLVED_TTL); + removeTimersRef.current.set(url, timer); + }, []); + + useEffect(() => { + mountedRef.current = true; + + const unsubPromises = Promise.all([ + api.onChallengeDetected((info) => { + if (!mountedRef.current) return; + setChallenges((prev) => { + const filtered = prev.filter((c) => c.url !== info.url); + if (filtered.length >= MAX_CHALLENGES) filtered.shift(); + return [ + ...filtered, + { url: info.url, kinds: info.kinds, riskScore: info.risk_score, resolvedAt: null }, + ]; + }); + }), + api.onChallengeSolved((info) => { + if (!mountedRef.current) return; + setChallenges((prev) => + prev.map((c) => (c.url === info.url ? { ...c, resolvedAt: Date.now() } : c)), + ); + scheduleRemoval(info.url); + }), + api.onChallengeFailed((info) => { + if (!mountedRef.current) return; + setChallenges((prev) => prev.filter((c) => c.url !== info.challenge_url)); + }), + ]); + + return () => { + mountedRef.current = false; + removeTimersRef.current.forEach((t) => clearTimeout(t)); + removeTimersRef.current.clear(); + unsubPromises.then((unsubs) => unsubs.forEach((u) => u())); + }; + }, [scheduleRemoval]); + + const active = challenges.filter((c) => !c.resolvedAt); + + return ( +
+
+ Challenges + {active.length > 0 && ( + {active.length} active + )} +
+
+ {active.length === 0 && ( +
No active challenges
+ )} + {active.map((ch) => ( +
+
+ {"\u26A0"} + {ch.kinds.join(", ")} +
+
+ {ch.url} +
+
+ Risk: {ch.riskScore}/100 +
+ +
+ ))} +
+
+ ); +} diff --git a/crates/pardus-tauri/frontend/src/components/InstanceHeader.tsx b/crates/pardus-tauri/frontend/src/components/InstanceHeader.tsx new file mode 100644 index 0000000..8b03f84 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/InstanceHeader.tsx @@ -0,0 +1,109 @@ +import { useState, useCallback, useEffect } from "react"; +import { useAgent } from "../context/AgentContext"; +import * as api from "../api/tauri"; + +export function InstanceHeader() { + const { instances, selectedId, refreshTree } = useAgent(); + const [url, setUrl] = useState(""); + const [navigating, setNavigating] = useState(false); + + const instance = instances.find((i) => i.id === selectedId); + + useEffect(() => { + if (instance?.current_url) { + setUrl(instance.current_url); + } else { + setUrl(""); + } + }, [selectedId, instance?.current_url]); + + const handleNavigate = useCallback(async () => { + if (!selectedId || !url.trim()) return; + let target = url.trim(); + if (!/^https?:\/\//i.test(target)) { + target = "https://" + target; + } + setNavigating(true); + try { + await api.executeCdp(selectedId, "Page.navigate", { url: target }); + await refreshTree(); + } catch (e) { + console.error("Navigate failed:", e); + } finally { + setNavigating(false); + } + }, [selectedId, url, refreshTree]); + + const handleReload = useCallback(async () => { + if (!selectedId) return; + try { + await api.executeCdp(selectedId, "Page.reload", {}); + await refreshTree(); + } catch (e) { + console.error("Reload failed:", e); + } + }, [selectedId, refreshTree]); + + if (!instance) { + return ( +
+ Pardus Mission Control + Spawn an agent to begin +
+ ); + } + + return ( +
+ Pardus +
+ setUrl(e.target.value)} + onKeyDown={(e) => e.key === "Enter" && handleNavigate()} + placeholder={instance.current_url ?? "Enter URL..."} + /> + + +
+
+ + + {instance.agent_status} + | + :{instance.port} +
+
+ ); +} diff --git a/crates/pardus-tauri/frontend/src/components/InteractionBar.tsx b/crates/pardus-tauri/frontend/src/components/InteractionBar.tsx new file mode 100644 index 0000000..9595e7c --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/InteractionBar.tsx @@ -0,0 +1,274 @@ +import { useState, useCallback, useRef, useEffect } from "react"; +import { useAgent } from "../context/AgentContext"; +import * as api from "../api/tauri"; + +interface LogEntry { + id: string; + text: string; + type: "command" | "result" | "error"; +} + +/** + * Split a line into tokens by whitespace, respecting double-quoted strings. + * Mirrors the REPL's split_tokens function (does NOT treat # as comment). + */ +function splitTokens(input: string): string[] { + const tokens: string[] = []; + let current = ""; + let inQuotes = false; + + for (const ch of input) { + if (ch === '"') { + inQuotes = !inQuotes; + } else if (/\s/.test(ch) && !inQuotes) { + if (current.length > 0) { + tokens.push(current); + current = ""; + } + } else { + current += ch; + } + } + if (current.length > 0) tokens.push(current); + return tokens; +} + +export function InteractionBar() { + const { selectedId, refreshTree, refreshInstances } = useAgent(); + const [input, setInput] = useState(""); + const [log, setLog] = useState([]); + const [history, setHistory] = useState([]); + const [historyIdx, setHistoryIdx] = useState(-1); + const scrollRef = useRef(null); + + useEffect(() => { + if (scrollRef.current) { + scrollRef.current.scrollTop = scrollRef.current.scrollHeight; + } + }, [log.length]); + + const addLog = useCallback((text: string, type: LogEntry["type"]) => { + setLog((prev) => [...prev.slice(-99), { id: `${Date.now()}-${Math.random()}`, text, type }]); + }, []); + + const handleExecute = useCallback( + async (cmd: string) => { + if (!selectedId || !cmd.trim()) return; + + addLog(`pardus> ${cmd}`, "command"); + setHistory((prev) => [...prev, cmd]); + setHistoryIdx(-1); + + const tokens = splitTokens(cmd.trim()); + if (tokens.length === 0) return; + + try { + switch (tokens[0]) { + // Navigation + case "visit": + case "open": { + if (tokens.length < 2) { addLog("Usage: visit ", "error"); return; } + await api.executeCdp(selectedId, "Page.navigate", { url: tokens[1] }); + addLog("Navigated", "result"); + refreshTree(); + break; + } + case "reload": { + await api.executeCdp(selectedId, "Page.reload", {}); + addLog("Reloaded", "result"); + refreshTree(); + break; + } + case "back": { + await api.executeCdp(selectedId, "Page.navigate", { url: "back" }); + addLog("Back", "result"); + refreshTree(); + break; + } + case "forward": { + await api.executeCdp(selectedId, "Page.navigate", { url: "forward" }); + addLog("Forward", "result"); + refreshTree(); + break; + } + + // Interactions + case "click": { + if (tokens.length < 2) { addLog("Usage: click ", "error"); return; } + const sel = tokens[1]; + const selector = sel.startsWith("#") && /^\d+$/.test(sel.slice(1)) + ? `#${sel.slice(1)}` : sel; + await api.executeCdp(selectedId, "Pardus.interact", { action: "click", selector }); + addLog(`Clicked ${sel}`, "result"); + refreshTree(); + break; + } + case "type": { + if (tokens.length < 3) { addLog("Usage: type ", "error"); return; } + const sel = tokens[1]; + const value = tokens.slice(2).join(" "); + await api.executeCdp(selectedId, "Pardus.interact", { + action: "type", selector: sel, value, + }); + addLog(`Typed '${value}' into ${sel}`, "result"); + break; + } + case "submit": { + if (tokens.length < 2) { addLog("Usage: submit [name=value ...]", "error"); return; } + const fields: Record = {}; + for (const f of tokens.slice(2)) { + const [k, ...v] = f.split("="); + if (v.length > 0) fields[k] = v.join("="); + else addLog(`Invalid field '${f}', expected name=value`, "error"); + } + await api.executeCdp(selectedId, "Pardus.interact", { + action: "submit", selector: tokens[1], fields, + }); + addLog(`Submitted ${tokens[1]}`, "result"); + refreshTree(); + break; + } + case "scroll": { + const dir = tokens[1] ?? "down"; + const px = dir === "up" ? -400 : dir === "to-top" ? -99999 : dir === "to-bottom" ? 99999 : 400; + await api.executeCdp(selectedId, "Runtime.evaluate", { + expression: `window.scrollBy(0, ${px})`, + }); + addLog(`Scrolled ${dir}`, "result"); + refreshTree(); + break; + } + case "wait": { + if (tokens.length < 2) { addLog("Usage: wait [timeout_ms]", "error"); return; } + const timeout = tokens[2] ? parseInt(tokens[2]) : 5000; + await api.executeCdp(selectedId, "Pardus.wait", { + condition: "selector", selector: tokens[1], timeoutMs: timeout, + }); + addLog(`Wait satisfied: ${tokens[1]}`, "result"); + break; + } + case "event": { + if (tokens.length < 3) { addLog("Usage: event [init_json]", "error"); return; } + await api.executeCdp(selectedId, "Pardus.interact", { + action: "event", selector: tokens[1], eventType: tokens[2], init: tokens[3], + }); + addLog(`Dispatched '${tokens[2]}' on ${tokens[1]}`, "result"); + break; + } + + // Tree / inspect + case "tree": + case "dom": { + refreshTree(); + addLog("Tree refreshed", "result"); + break; + } + + // Settings + case "js": { + const val = tokens[1]; + if (val === "on" || val === "true" || val === "1") { + addLog("JS enabled (applied on next navigation)", "result"); + } else if (val === "off" || val === "false" || val === "0") { + addLog("JS disabled", "result"); + } else { + addLog("JS is on by default", "result"); + } + break; + } + + // Help + case "help": + case "?": { + addLog([ + "Navigation: visit | reload | back | forward", + "Interact: click <#id|sel> | type <#id|sel> | submit [k=v..]", + " scroll [down|up|to-top|to-bottom] | wait [ms] | event ", + "Inspect: tree", + "Settings: js [on|off] | help", + "Exit: exit", + ].join("\n"), "result"); + break; + } + + case "exit": + case "quit": { + addLog("Use the sidebar kill button to stop the agent", "result"); + break; + } + + default: + addLog(`Unknown command: ${tokens[0]}. Type "help" for available commands.`, "error"); + } + } catch (e) { + addLog(String(e), "error"); + } + }, + [selectedId, addLog, refreshTree, refreshInstances], + ); + + const handleKeyDown = (e: React.KeyboardEvent) => { + if (e.key === "Enter") { + handleExecute(input); + setInput(""); + } else if (e.key === "ArrowUp") { + e.preventDefault(); + if (history.length > 0) { + const newIdx = historyIdx < 0 ? history.length - 1 : Math.max(0, historyIdx - 1); + setHistoryIdx(newIdx); + setInput(history[newIdx]); + } + } else if (e.key === "ArrowDown") { + e.preventDefault(); + if (historyIdx >= 0) { + const newIdx = historyIdx + 1; + if (newIdx >= history.length) { + setHistoryIdx(-1); + setInput(""); + } else { + setHistoryIdx(newIdx); + setInput(history[newIdx]); + } + } + } + }; + + if (!selectedId) { + return
Spawn an agent to start
; + } + + return ( +
+
+ {log.length === 0 && ( +
+ pardus-browser repl — type "help" for commands +
+ )} + {log.map((entry) => ( +
+ {entry.type === "command" ? ( + {entry.text} + ) : entry.type === "error" ? ( + {entry.text} + ) : ( + {entry.text} + )} +
+ ))} +
+
+ pardus> + setInput(e.target.value)} + onKeyDown={handleKeyDown} + placeholder="visit https://example.com" + spellCheck={false} + /> +
+
+ ); +} diff --git a/crates/pardus-tauri/frontend/src/components/TreeViewer.tsx b/crates/pardus-tauri/frontend/src/components/TreeViewer.tsx new file mode 100644 index 0000000..8b37d2e --- /dev/null +++ b/crates/pardus-tauri/frontend/src/components/TreeViewer.tsx @@ -0,0 +1,163 @@ +import { useState, useCallback } from "react"; +import { useAgent } from "../context/AgentContext"; +import type { SemanticNode } from "../types"; +import * as api from "../api/tauri"; + +type Filter = "all" | "interactive"; + +function hasInteractiveDescendant(node: SemanticNode): boolean { + if (node.interactive) return true; + return node.children.some(hasInteractiveDescendant); +} + +function formatRole(role: string): string { + if (role.startsWith("heading")) return role; + return role.charAt(0).toUpperCase() + role.slice(1); +} + +function TreeNode({ + node, + depth, + filter, + onAction, +}: { + node: SemanticNode; + depth: number; + filter: Filter; + onAction: (node: SemanticNode) => void; +}) { + const [expanded, setExpanded] = useState(depth < 2); + + if (filter === "interactive" && !node.interactive && !hasInteractiveDescendant(node)) { + return null; + } + + const hasChildren = node.children.length > 0; + const actionLabel = node.action ? node.action : null; + + return ( +
+
+ {hasChildren ? ( + + ) : ( + + )} + node.interactive && onAction(node)}> + {formatRole(node.role)} + + {node.element_id != null && ( + onAction(node)} title={`Element #${node.element_id}`}> + #{node.element_id} + + )} + {node.name && "{node.name}"} + {node.tag} + {actionLabel && ( + + {actionLabel} + + )} + {node.href && ( + + {"\u2192"} {node.href.length > 40 ? node.href.slice(0, 40) + "..." : node.href} + + )} +
+ {expanded && hasChildren && ( +
+ {node.children.map((child, i) => ( + + ))} +
+ )} +
+ ); +} + +export function TreeViewer() { + const { tree, stats, selectedId, loading } = useAgent(); + const [filter, setFilter] = useState("interactive"); + + const handleAction = useCallback( + async (node: SemanticNode) => { + if (!selectedId || !node.interactive) return; + + const action = node.action; + const selector = node.selector ?? (node.element_id != null ? `#${node.element_id}` : undefined); + + if (!action || !selector) return; + + if (action === "navigate" && node.href) { + try { + await api.executeCdp(selectedId, "Page.navigate", { url: node.href }); + } catch (e) { + console.error("Navigate failed:", e); + } + } else { + try { + await api.executeCdp(selectedId, "Pardus.interact", { + action: action === "fill" ? "type" : action, + selector, + }); + } catch (e) { + console.error("Action failed:", e); + } + } + }, + [selectedId], + ); + + if (!tree) { + return ( +
+
+ + +
+
+ {loading ? "Loading..." : "Select an agent and navigate to see the semantic tree"} +
+
+ ); + } + + return ( +
+
+ + + {stats && ( + + {stats.landmarks}L {stats.links}lnk {stats.headings}H {stats.actions}act + + )} +
+
+ +
+
+ ); +} diff --git a/crates/pardus-tauri/frontend/src/context/AgentContext.tsx b/crates/pardus-tauri/frontend/src/context/AgentContext.tsx new file mode 100644 index 0000000..213a24a --- /dev/null +++ b/crates/pardus-tauri/frontend/src/context/AgentContext.tsx @@ -0,0 +1,169 @@ +import { + createContext, + useContext, + useState, + useCallback, + useEffect, + useRef, + type ReactNode, +} from "react"; +import type { + InstanceInfo, + CdpEvent, + SemanticNode, + TreeStats, +} from "../types"; +import * as api from "../api/tauri"; + +interface AgentContextValue { + instances: InstanceInfo[]; + selectedId: string | null; + select: (id: string | null) => void; + tree: SemanticNode | null; + stats: TreeStats | null; + events: CdpEvent[]; + refreshInstances: () => Promise; + refreshTree: () => Promise; + loading: boolean; +} + +const AgentContext = createContext(null); + +export function useAgent(): AgentContextValue { + const ctx = useContext(AgentContext); + if (!ctx) throw new Error("useAgent must be used within AgentProvider"); + return ctx; +} + +const MAX_EVENTS = 1000; + +function debounce void>( + fn: T, + ms: number, +): (...args: Parameters) => void { + let timer: ReturnType; + return (...args: Parameters) => { + clearTimeout(timer); + timer = setTimeout(() => fn(...args), ms); + }; +} + +export function AgentProvider({ children }: { children: ReactNode }) { + const [instances, setInstances] = useState([]); + const [selectedId, setSelectedId] = useState(null); + const [tree, setTree] = useState(null); + const [stats, setStats] = useState(null); + const [events, setEvents] = useState([]); + const [loading, setLoading] = useState(false); + const eventsRef = useRef([]); + const mountedRef = useRef(true); + + const refreshInstances = useCallback(async () => { + try { + const list = await api.listInstances(); + setInstances(list); + } catch { + console.error("Failed to refresh instances"); + } + }, []); + + const refreshTree = useCallback(async () => { + if (!selectedId) return; + try { + setLoading(true); + const result = await api.getSemanticTree(selectedId); + setTree(result.semanticTree.root); + setStats(result.semanticTree.stats); + } catch { + setTree(null); + setStats(null); + } finally { + setLoading(false); + } + }, [selectedId]); + + const debouncedRefreshTree = useRef( + debounce(() => { + refreshTree(); + }, 300), + ); + + useEffect(() => { + refreshInstances(); + const interval = setInterval(refreshInstances, 3000); + return () => clearInterval(interval); + }, [refreshInstances]); + + useEffect(() => { + if (!selectedId) { + setTree(null); + setStats(null); + setEvents([]); + eventsRef.current = []; + return; + } + + mountedRef.current = true; + refreshTree(); + + const unsubPromises = Promise.all([ + api.onCdpEvent((event) => { + if (!mountedRef.current) return; + if (event.instance_id !== selectedId) return; + + const updated = [...eventsRef.current, event]; + if (updated.length > MAX_EVENTS) { + updated.splice(0, updated.length - MAX_EVENTS); + } + eventsRef.current = updated; + setEvents(updated); + + if (event.method === "Page.frameNavigated" || event.method.startsWith("Pardus.action")) { + debouncedRefreshTree.current(); + } + }), + api.onAgentStatusChanged((change) => { + if (!mountedRef.current) return; + if (change.instance_id === selectedId) { + refreshInstances(); + } + }), + ]); + + return () => { + mountedRef.current = false; + unsubPromises.then((unsubs) => { + if (unsubs.length > 0) { + unsubs.forEach((u) => u()); + } + }); + }; + }, [selectedId, refreshInstances, refreshTree]); + + const select = useCallback( + (id: string | null) => { + setSelectedId(id); + setEvents([]); + eventsRef.current = []; + }, + [], + ); + + return ( + + {children} + + ); +} diff --git a/crates/pardus-tauri/frontend/src/index.css b/crates/pardus-tauri/frontend/src/index.css new file mode 100644 index 0000000..ccfee75 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/index.css @@ -0,0 +1,642 @@ +:root { + --bg: #1a1b26; + --bg-surface: #24283b; + --bg-hover: #2f334d; + --border: #3b4261; + --text: #a9b1d6; + --text-bright: #c0caf5; + --text-muted: #565f89; + --accent: #7aa2f7; + --accent-dim: #3d59a1; + --green: #9ece6a; + --red: #f7768e; + --yellow: #e0af68; + --orange: #ff9e64; + --purple: #bb9af7; + --cyan: #7dcfff; + + --font-mono: "JetBrains Mono", "SF Mono", "Fira Code", "Cascadia Code", monospace; + --font-sans: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; + + --radius: 4px; + --radius-sm: 3px; +} + +*, *::before, *::after { + box-sizing: border-box; + margin: 0; + padding: 0; +} + +html, body { + height: 100%; + overflow: hidden; + font-family: var(--font-sans); + font-size: 13px; + color: var(--text); + background: var(--bg); + -webkit-font-smoothing: antialiased; +} + +::-webkit-scrollbar { + width: 6px; + height: 6px; +} +::-webkit-scrollbar-track { + background: transparent; +} +::-webkit-scrollbar-thumb { + background: var(--border); + border-radius: 3px; +} +::-webkit-scrollbar-thumb:hover { + background: var(--text-muted); +} + +.app { + display: flex; + flex-direction: column; + height: 100vh; + overflow: hidden; +} + +.instance-header { + display: flex; + align-items: center; + gap: 12px; + padding: 6px 12px; + background: var(--bg-surface); + border-bottom: 1px solid var(--border); + flex-shrink: 0; + height: 42px; +} + +.brand { + font-weight: 700; + font-size: 14px; + color: var(--accent); + white-space: nowrap; +} + +.nav-bar { + display: flex; + gap: 6px; + flex: 1; + max-width: 600px; +} + +.nav-input { + flex: 1; + height: 28px; + background: var(--bg); + border: 1px solid var(--border); + border-radius: var(--radius); + color: var(--text-bright); + padding: 0 8px; + font-family: var(--font-mono); + font-size: 12px; + outline: none; +} +.nav-input:focus { + border-color: var(--accent); +} +.nav-input::placeholder { + color: var(--text-muted); +} + +.header-meta { + display: flex; + align-items: center; + gap: 6px; + margin-left: auto; + font-size: 11px; +} + +.meta-text { + color: var(--text-muted); +} + +.meta-sep { + color: var(--border); +} + +.header-hint { + color: var(--text-muted); + font-size: 12px; + margin-left: auto; +} + +.status-dot { + width: 8px; + height: 8px; + border-radius: 50%; + flex-shrink: 0; +} +.status-dot.small { + width: 6px; + height: 6px; +} + +.app-main { + display: flex; + flex: 1; + overflow: hidden; +} + +.sidebar { + width: 200px; + flex-shrink: 0; + background: var(--bg-surface); + border-right: 1px solid var(--border); + display: flex; + flex-direction: column; + overflow: hidden; +} + +.sidebar-right { + width: 220px; + flex-shrink: 0; + border-right: none; + border-left: 1px solid var(--border); +} + +.panel-header { + display: flex; + align-items: center; + justify-content: space-between; + padding: 8px 10px; + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.panel-title { + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--text-muted); +} + +.btn { + padding: 4px 10px; + border: 1px solid var(--border); + border-radius: var(--radius); + background: var(--bg-surface); + color: var(--text); + font-size: 12px; + cursor: pointer; + white-space: nowrap; +} +.btn:hover { + background: var(--bg-hover); +} +.btn:disabled { + opacity: 0.5; + cursor: not-allowed; +} + +.btn-sm { + padding: 2px 8px; + font-size: 11px; + height: 24px; +} + +.btn-primary { + background: var(--accent-dim); + border-color: var(--accent); + color: var(--text-bright); +} +.btn-primary:hover { + background: var(--accent); + color: #fff; +} + +.btn-active { + background: var(--accent-dim); + border-color: var(--accent); + color: var(--text-bright); +} + +.btn-icon { + background: none; + border: none; + color: var(--text-muted); + cursor: pointer; + padding: 2px 4px; + border-radius: var(--radius-sm); + font-size: 14px; + line-height: 1; +} +.btn-icon:hover { + color: var(--text); + background: var(--bg-hover); +} +.btn-icon-sm { + font-size: 11px; + padding: 1px 4px; +} + +.badge { + font-size: 10px; + padding: 1px 6px; + border-radius: 10px; + font-weight: 600; +} +.badge-warning { + background: rgba(255, 158, 100, 0.15); + color: var(--orange); +} + +.agent-list { + flex: 1; + overflow-y: auto; + padding: 4px; +} + +.agent-empty { + padding: 16px 10px; + color: var(--text-muted); + font-size: 12px; + line-height: 1.5; +} + +.agent-card { + padding: 8px; + border-radius: var(--radius); + cursor: pointer; + margin-bottom: 2px; + border: 1px solid transparent; + transition: background 0.1s; +} +.agent-card:hover { + background: var(--bg-hover); +} +.agent-card-selected { + background: var(--bg-hover); + border-color: var(--accent-dim); +} + +.agent-card-header { + display: flex; + align-items: center; + gap: 6px; +} + +.agent-card-id { + font-family: var(--font-mono); + font-size: 11px; + color: var(--text-bright); + flex: 1; +} + +.agent-card-status { + font-size: 10px; + color: var(--text-muted); + text-transform: uppercase; +} + +.agent-card-url { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + margin-top: 4px; + padding-left: 14px; +} + +.center { + flex: 1; + display: flex; + flex-direction: column; + overflow: hidden; + min-width: 0; +} + +.panel-split { + flex: 1; + display: flex; + overflow: hidden; +} + +.panel { + display: flex; + flex-direction: column; + overflow: hidden; +} + +.panel-tree { + flex: 1; + border-right: 1px solid var(--border); +} + +.panel-log { + width: 320px; + flex-shrink: 0; +} + +.tree-panel, +.action-log, +.challenge-panel, +.interaction-bar { + display: flex; + flex-direction: column; + overflow: hidden; + height: 100%; +} + +.tree-toolbar { + display: flex; + align-items: center; + gap: 6px; + padding: 6px 10px; + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.tree-stats { + margin-left: auto; + font-size: 10px; + font-family: var(--font-mono); + color: var(--text-muted); +} + +.tree-content { + flex: 1; + overflow-y: auto; + padding: 4px 0; +} + +.tree-node { + user-select: none; +} + +.tree-node-row { + display: flex; + align-items: center; + gap: 4px; + padding: 2px 6px; + height: 22px; + font-size: 11px; + font-family: var(--font-mono); + white-space: nowrap; +} +.tree-node-row:hover { + background: var(--bg-hover); +} + +.tree-node-interactive { + cursor: pointer; +} +.tree-node-interactive:hover .tree-role, +.tree-node-interactive:hover .tree-eid { + color: var(--accent); +} + +.tree-toggle { + background: none; + border: none; + color: var(--text-muted); + cursor: pointer; + font-size: 10px; + width: 14px; + text-align: center; + padding: 0; + flex-shrink: 0; +} + +.tree-toggle-spacer { + width: 14px; + flex-shrink: 0; +} + +.tree-role { + color: var(--purple); +} +.tree-eid { + color: var(--accent); + font-size: 10px; + opacity: 0.8; +} +.tree-name { + color: var(--green); + overflow: hidden; + text-overflow: ellipsis; + max-width: 200px; +} +.tree-tag { + color: var(--text-muted); + font-size: 10px; +} +.tree-action { + color: var(--cyan); + font-size: 9px; + text-transform: uppercase; +} +.tree-href { + color: var(--cyan); + opacity: 0.7; + font-size: 10px; + overflow: hidden; + text-overflow: ellipsis; + max-width: 180px; +} + +.tree-empty, +.log-empty { + flex: 1; + display: flex; + align-items: center; + justify-content: center; + color: var(--text-muted); + font-size: 12px; +} + +.action-log-toolbar { + display: flex; + align-items: center; + gap: 8px; + padding: 6px 10px; + border-bottom: 1px solid var(--border); + flex-shrink: 0; +} + +.log-count { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-muted); + background: var(--bg); + padding: 1px 6px; + border-radius: 10px; +} + +.auto-scroll-label { + margin-left: auto; + font-size: 10px; + color: var(--text-muted); + display: flex; + align-items: center; + gap: 4px; + cursor: pointer; +} +.auto-scroll-label input { + margin: 0; +} + +.action-log-entries { + flex: 1; + overflow-y: auto; + padding: 4px 0; +} + +.log-entry { + display: flex; + align-items: baseline; + gap: 6px; + padding: 2px 10px; + font-size: 11px; + font-family: var(--font-mono); + line-height: 1.5; +} + +.log-time { + color: var(--text-muted); + font-size: 10px; + flex-shrink: 0; +} + +.log-icon { + flex-shrink: 0; + width: 14px; + text-align: center; + font-size: 12px; +} + +.log-summary { + color: var(--text); + flex-shrink: 0; +} + +.log-detail { + color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + font-size: 10px; +} + +.log-navigate .log-icon { color: var(--accent); } +.log-action_start .log-icon { color: var(--cyan); } +.log-action_complete .log-icon { color: var(--green); } +.log-action_fail .log-icon { color: var(--red); } +.log-action_fail .log-summary { color: var(--red); } + +.interaction-bar { + border-top: 1px solid var(--border); + height: 140px; + flex-shrink: 0; +} + +.interaction-log { + flex: 1; + overflow-y: auto; + padding: 4px 10px; +} + +.interaction-log .log-entry { + font-size: 11px; + line-height: 1.4; +} + +.log-cmd { + color: var(--cyan); +} +.log-res { + color: var(--text); +} +.log-err { + color: var(--red); +} + +.interaction-input-row { + display: flex; + align-items: center; + padding: 4px 10px; + border-top: 1px solid var(--border); + flex-shrink: 0; +} + +.prompt { + color: var(--accent); + font-family: var(--font-mono); + font-size: 12px; + font-weight: 700; + margin-right: 6px; +} + +.interaction-input { + flex: 1; + background: none; + border: none; + color: var(--text-bright); + font-family: var(--font-mono); + font-size: 12px; + outline: none; +} +.interaction-input::placeholder { + color: var(--text-muted); +} + +.challenge-list { + flex: 1; + overflow-y: auto; + padding: 4px; +} + +.challenge-empty { + padding: 16px 10px; + color: var(--text-muted); + font-size: 12px; + text-align: center; +} + +.challenge-card { + padding: 8px; + border: 1px solid var(--border); + border-radius: var(--radius); + margin-bottom: 4px; + background: rgba(255, 158, 100, 0.05); +} + +.challenge-card-header { + display: flex; + align-items: center; + gap: 6px; + margin-bottom: 4px; +} + +.challenge-icon { + color: var(--orange); +} + +.challenge-types { + font-size: 11px; + color: var(--orange); + font-weight: 600; +} + +.challenge-url { + font-family: var(--font-mono); + font-size: 10px; + color: var(--text-muted); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; +} + +.challenge-risk { + font-size: 10px; + color: var(--text-muted); + margin-top: 2px; +} diff --git a/crates/pardus-tauri/frontend/src/main.tsx b/crates/pardus-tauri/frontend/src/main.tsx new file mode 100644 index 0000000..2d79d5f --- /dev/null +++ b/crates/pardus-tauri/frontend/src/main.tsx @@ -0,0 +1,10 @@ +import { StrictMode } from "react"; +import { createRoot } from "react-dom/client"; +import { App } from "./App"; +import "./index.css"; + +createRoot(document.getElementById("root")!).render( + + + , +); diff --git a/crates/pardus-tauri/frontend/src/types.ts b/crates/pardus-tauri/frontend/src/types.ts new file mode 100644 index 0000000..3c44339 --- /dev/null +++ b/crates/pardus-tauri/frontend/src/types.ts @@ -0,0 +1,79 @@ +export interface InstanceInfo { + id: string; + port: number; + ws_url: string; + running: boolean; + browser_window_open: boolean; + current_url: string | null; + agent_status: AgentStatus; +} + +export type AgentStatus = + | "idle" + | "connected" + | "running" + | "paused" + | "waiting-challenge" + | "error"; + +export type BridgeStatus = + | "Connecting" + | "Connected" + | "Reconnecting" + | "Disconnected" + | "Failed"; + +export interface SemanticNode { + role: string; + name: string | null; + tag: string; + interactive: boolean; + is_disabled?: boolean; + href?: string; + action?: string; + element_id?: number; + selector?: string; + input_type?: string; + placeholder?: string; + is_required?: boolean; + options?: Array<{ value: string; label: string }>; + children: SemanticNode[]; +} + +export interface SemanticTree { + semanticTree: { + root: SemanticNode; + stats: TreeStats; + }; +} + +export interface TreeStats { + landmarks: number; + links: number; + headings: number; + actions: number; + forms: number; + images: number; + iframes: number; + total_nodes: number; +} + +export interface CdpEvent { + instance_id: string; + method: string; + params: Record; + timestamp: number; +} + +export interface ChallengeInfo { + url: string; + status: number; + kinds: string[]; + risk_score: number; +} + +export interface StatusChange { + instance_id: string; + old_status: AgentStatus; + new_status: AgentStatus; +} diff --git a/crates/pardus-tauri/frontend/tsconfig.json b/crates/pardus-tauri/frontend/tsconfig.json new file mode 100644 index 0000000..dd02c6e --- /dev/null +++ b/crates/pardus-tauri/frontend/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "bundler", + "strict": true, + "noEmit": true, + "jsx": "react-jsx", + "skipLibCheck": true, + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true, + "isolatedModules": true + }, + "include": ["src"] +} diff --git a/crates/pardus-tauri/frontend/tsconfig.tsbuildinfo b/crates/pardus-tauri/frontend/tsconfig.tsbuildinfo new file mode 100644 index 0000000..e89d352 --- /dev/null +++ b/crates/pardus-tauri/frontend/tsconfig.tsbuildinfo @@ -0,0 +1 @@ +{"root":["./src/app.tsx","./src/main.tsx","./src/types.ts","./src/api/tauri.ts","./src/components/actionlog.tsx","./src/components/agentsidebar.tsx","./src/components/challengepanel.tsx","./src/components/instanceheader.tsx","./src/components/interactionbar.tsx","./src/components/treeviewer.tsx","./src/context/agentcontext.tsx"],"version":"5.7.3"} \ No newline at end of file diff --git a/crates/pardus-tauri/frontend/vite.config.ts b/crates/pardus-tauri/frontend/vite.config.ts new file mode 100644 index 0000000..9159634 --- /dev/null +++ b/crates/pardus-tauri/frontend/vite.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from "vite"; +import react from "@vitejs/plugin-react"; + +export default defineConfig({ + plugins: [react()], + build: { + outDir: "dist", + emptyOutDir: true, + }, + clearScreen: false, + server: { + port: 1420, + strictPort: true, + watch: { + ignored: ["**/src-tauri/**"], + }, + }, +}); diff --git a/crates/pardus-tauri/package.json b/crates/pardus-tauri/package.json index 9cf86bc..686d978 100644 --- a/crates/pardus-tauri/package.json +++ b/crates/pardus-tauri/package.json @@ -4,15 +4,14 @@ "version": "0.1.0", "type": "module", "scripts": { - "build": "esbuild src/main.ts --bundle --outfile=dist/bundle.js --format=esm --target=es2022 --platform=browser && cp src/index.html dist/index.html", - "dev": "esbuild src/main.ts --bundle --outfile=dist/bundle.js --format=esm --target=es2022 --platform=browser --watch", - "typecheck": "tsc --noEmit" + "build": "cd frontend && npm run build", + "dev": "cd frontend && npm run dev", + "typecheck": "cd frontend && tsc --noEmit" }, "dependencies": { "@tauri-apps/api": "^2.0.0" }, "devDependencies": { - "esbuild": "^0.28.0", "typescript": "^5.5.0" } } diff --git a/crates/pardus-tauri/src-tauri/Cargo.toml b/crates/pardus-tauri/src-tauri/Cargo.toml index 1082fb8..7337917 100644 --- a/crates/pardus-tauri/src-tauri/Cargo.toml +++ b/crates/pardus-tauri/src-tauri/Cargo.toml @@ -22,8 +22,10 @@ tracing-subscriber = { workspace = true, features = ["env-filter"] } anyhow = { workspace = true } async-trait = { workspace = true } url = { workspace = true } +chrono = { workspace = true } tokio-tungstenite = "0.26" +tokio-util = "0.7" futures-util = { workspace = true } pardus-core = { path = "../../pardus-core" } diff --git a/crates/pardus-tauri/src-tauri/src/cdp_bridge.rs b/crates/pardus-tauri/src-tauri/src/cdp_bridge.rs new file mode 100644 index 0000000..6d01c3e --- /dev/null +++ b/crates/pardus-tauri/src-tauri/src/cdp_bridge.rs @@ -0,0 +1,443 @@ +use std::collections::{HashMap, VecDeque}; +use std::sync::Arc; + +use futures_util::{SinkExt, StreamExt}; +use serde::{Deserialize, Serialize}; +use tauri::{Emitter, AppHandle}; +use tokio::sync::{mpsc, oneshot, RwLock}; +use tokio_tungstenite::tungstenite; + +const EVENT_BUFFER_SIZE: usize = 500; +const RECONNECT_BASE_MS: u64 = 1000; +const RECONNECT_MAX_MS: u64 = 30000; +const INIT_COMMAND_COUNT: u64 = 4; +type WsStream = tokio_tungstenite::WebSocketStream< + tokio_tungstenite::MaybeTlsStream, +>; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CdpEventRecord { + pub method: String, + pub params: serde_json::Value, + pub timestamp: i64, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub enum BridgeStatus { + Connecting, + Connected, + Reconnecting, + Disconnected, + Failed, +} + +struct InstanceBridge { + #[allow(dead_code)] + port: u16, + ws_write: Option>, + event_buffer: VecDeque, + last_activity: std::time::Instant, + status: BridgeStatus, + cancel: tokio_util::sync::CancellationToken, + pending_commands: HashMap>, + next_command_id: u64, +} + +pub struct CdpBridge { + instances: Arc>>, +} + +impl CdpBridge { + pub fn new() -> Self { + Self { + instances: Arc::new(RwLock::new(HashMap::new())), + } + } + + pub async fn connect( + &self, + instance_id: String, + port: u16, + app_handle: tauri::AppHandle, + ) { + let mut instances = self.instances.write().await; + if instances.contains_key(&instance_id) { + return; + } + + let bridge = InstanceBridge { + port, + ws_write: None, + event_buffer: VecDeque::with_capacity(EVENT_BUFFER_SIZE), + last_activity: std::time::Instant::now(), + status: BridgeStatus::Connecting, + cancel: tokio_util::sync::CancellationToken::new(), + pending_commands: HashMap::new(), + next_command_id: INIT_COMMAND_COUNT + 1, + }; + + let cancel = bridge.cancel.clone(); + instances.insert(instance_id.clone(), bridge); + drop(instances); + + let instances_ref = self.instances.clone(); + let ws_url = format!("ws://127.0.0.1:{}", port); + + tokio::spawn(async move { + run_bridge_loop( + instances_ref, + instance_id, + port, + &ws_url, + app_handle, + cancel, + ) + .await; + }); + } + + pub async fn disconnect(&self, instance_id: &str) { + let mut instances = self.instances.write().await; + if let Some(mut bridge) = instances.remove(instance_id) { + bridge.cancel.cancel(); + for (_, tx) in bridge.pending_commands.drain() { + let _ = tx.send(serde_json::json!({ "error": "bridge disconnected" })); + } + } + } + + pub async fn send_command( + &self, + instance_id: &str, + method: String, + params: serde_json::Value, + ) -> Result { + let ws_write = { + let instances = self.instances.read().await; + let bridge = instances + .get(instance_id) + .ok_or_else(|| format!("no bridge for instance '{}'", instance_id))?; + bridge + .ws_write + .as_ref() + .ok_or_else(|| "bridge not connected".to_string())? + .clone() + }; + + let cmd_id = { + let mut instances = self.instances.write().await; + let bridge = instances + .get_mut(instance_id) + .ok_or_else(|| format!("no bridge for instance '{}'", instance_id))?; + let id = bridge.next_command_id; + bridge.next_command_id += 1; + id + }; + + let (tx, rx) = oneshot::channel(); + + { + let mut instances = self.instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.pending_commands.insert(cmd_id, tx); + } + } + + let msg = serde_json::json!({ + "id": cmd_id, + "method": method, + "params": params, + }); + + ws_write + .send(msg.to_string()) + .await + .map_err(|e| format!("failed to send command: {}", e))?; + + tokio::time::timeout(std::time::Duration::from_secs(30), rx) + .await + .map_err(|_| "command timed out".to_string())? + .map_err(|_| "command response channel dropped".to_string()) + } + + pub async fn get_events( + &self, + instance_id: &str, + limit: usize, + since: Option, + ) -> Vec { + let instances = self.instances.read().await; + if let Some(bridge) = instances.get(instance_id) { + let mut filtered: Vec = bridge + .event_buffer + .iter() + .filter(|e| since.map_or(true, |t| e.timestamp > t)) + .cloned() + .collect(); + let start = filtered.len().saturating_sub(limit); + filtered.drain(..start); + filtered + } else { + Vec::new() + } + } + + pub async fn get_status(&self, instance_id: &str) -> Option { + let instances = self.instances.read().await; + instances.get(instance_id).map(|b| b.status.clone()) + } + + pub async fn touch_activity(&self, instance_id: &str) { + let mut instances = self.instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.last_activity = std::time::Instant::now(); + } + } +} + +async fn run_bridge_loop( + instances: Arc>>, + instance_id: String, + port: u16, + ws_url: &str, + app_handle: tauri::AppHandle, + cancel: tokio_util::sync::CancellationToken, +) { + let mut reconnect_delay = RECONNECT_BASE_MS; + + loop { + if cancel.is_cancelled() { + break; + } + + update_status(&instances, &instance_id, BridgeStatus::Connecting).await; + + match tokio_tungstenite::connect_async(ws_url).await { + Ok((ws_stream, _)) => { + reconnect_delay = RECONNECT_BASE_MS; + update_status(&instances, &instance_id, BridgeStatus::Connected).await; + + let _ = app_handle.emit( + "cdp-bridge-connected", + serde_json::json!({ + "instance_id": instance_id, + "port": port, + }), + ); + + if let Err(e) = run_connected( + &instances, + &instance_id, + ws_stream, + &app_handle, + &cancel, + ) + .await + { + tracing::warn!( + instance_id = %instance_id, + error = %e, + "CDP bridge connection lost" + ); + } + + clear_ws_write(&instances, &instance_id).await; + + if cancel.is_cancelled() { + break; + } + + update_status(&instances, &instance_id, BridgeStatus::Reconnecting).await; + + let _ = app_handle.emit( + "cdp-bridge-disconnected", + serde_json::json!({ + "instance_id": instance_id, + "port": port, + }), + ); + } + Err(e) => { + tracing::warn!( + instance_id = %instance_id, + error = %e, + delay_ms = reconnect_delay, + "CDP bridge connection failed, retrying" + ); + } + } + + tokio::select! { + _ = tokio::time::sleep(std::time::Duration::from_millis(reconnect_delay)) => {}, + _ = cancel.cancelled() => break, + } + + reconnect_delay = (reconnect_delay * 2).min(RECONNECT_MAX_MS); + } + + update_status(&instances, &instance_id, BridgeStatus::Disconnected).await; +} + +async fn run_connected( + instances: &Arc>>, + instance_id: &str, + ws_stream: WsStream, + app_handle: &AppHandle, + cancel: &tokio_util::sync::CancellationToken, +) -> Result<(), String> { + let (mut ws_sink, mut ws_stream) = ws_stream.split(); + + let (cmd_tx, mut cmd_rx) = mpsc::channel::(256); + { + let mut instances = instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.ws_write = Some(cmd_tx); + } + } + + let init_commands = [ + r#"{"id":1,"method":"Target.setDiscoverTargets","params":{"discover":true}}"#, + r#"{"id":2,"method":"Page.enable","params":{}}"#, + r#"{"id":3,"method":"Network.enable","params":{"maxTotalBufferSize":10000000,"maxResourceBufferSize":5000000}}"#, + r#"{"id":4,"method":"Pardus.enable","params":{}}"#, + ]; + + for cmd in &init_commands { + if cancel.is_cancelled() { + return Ok(()); + } + ws_sink + .send(tungstenite::Message::Text((*cmd).into())) + .await + .map_err(|e| e.to_string())?; + } + + loop { + if cancel.is_cancelled() { + return Ok(()); + } + + tokio::select! { + msg = ws_stream.next() => { + match msg { + Some(Ok(tungstenite::Message::Text(text))) => { + handle_cdp_message(&text, instances, instance_id, app_handle).await; + } + Some(Ok(tungstenite::Message::Ping(data))) => { + let _ = ws_sink.send(tungstenite::Message::Pong(data)).await; + } + Some(Ok(tungstenite::Message::Close(_))) | None => { + return Err("connection closed".to_string()); + } + Some(Ok(tungstenite::Message::Binary(data))) => { + if let Ok(text) = String::from_utf8(data.to_vec()) { + handle_cdp_message(&text, instances, instance_id, app_handle).await; + } + } + Some(Err(e)) => { + return Err(format!("websocket error: {}", e)); + } + _ => {} + } + } + cmd = cmd_rx.recv() => { + match cmd { + Some(text) => { + ws_sink + .send(tungstenite::Message::Text(text.into())) + .await + .map_err(|e| format!("failed to send: {}", e))?; + } + None => { + return Err("command channel closed".to_string()); + } + } + } + _ = cancel.cancelled() => { + return Ok(()); + } + } + } +} + +async fn clear_ws_write( + instances: &Arc>>, + instance_id: &str, +) { + let mut instances = instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.ws_write = None; + } +} + +async fn handle_cdp_message( + text: &str, + instances: &Arc>>, + instance_id: &str, + app_handle: &AppHandle, +) { + let value: serde_json::Value = match serde_json::from_str(text) { + Ok(v) => v, + Err(_) => return, + }; + + if value.get("id").is_some() { + let cmd_id = value["id"].as_u64().unwrap_or(0); + if cmd_id <= INIT_COMMAND_COUNT { + return; + } + let mut instances = instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + if let Some(tx) = bridge.pending_commands.remove(&cmd_id) { + let _ = tx.send(value); + } + } + return; + } + + let method = match value["method"].as_str() { + Some(m) => m.to_string(), + None => return, + }; + + let timestamp = chrono::Utc::now().timestamp_millis(); + + let record = CdpEventRecord { + method: method.clone(), + params: value.get("params").cloned().unwrap_or(serde_json::json!({})), + timestamp, + }; + + { + let mut instances = instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.last_activity = std::time::Instant::now(); + + if bridge.event_buffer.len() >= EVENT_BUFFER_SIZE { + bridge.event_buffer.pop_front(); + } + bridge.event_buffer.push_back(record.clone()); + } + } + + let _ = app_handle.emit( + "cdp-event", + serde_json::json!({ + "instance_id": instance_id, + "method": method, + "params": record.params, + "timestamp": timestamp, + }), + ); +} + +async fn update_status( + instances: &Arc>>, + instance_id: &str, + status: BridgeStatus, +) { + let mut instances = instances.write().await; + if let Some(bridge) = instances.get_mut(instance_id) { + bridge.status = status; + } +} diff --git a/crates/pardus-tauri/src-tauri/src/commands.rs b/crates/pardus-tauri/src-tauri/src/commands.rs index dd65647..c99371c 100644 --- a/crates/pardus-tauri/src-tauri/src/commands.rs +++ b/crates/pardus-tauri/src-tauri/src/commands.rs @@ -1,7 +1,8 @@ -use tauri::{AppHandle, Manager, WebviewUrl, WebviewWindowBuilder}; +use tauri::{AppHandle, Emitter, Manager, WebviewUrl, WebviewWindowBuilder}; use serde::Serialize; use crate::AppState; +use crate::cdp_bridge::{CdpEventRecord, BridgeStatus}; // --------------------------------------------------------------------------- // Instance management commands @@ -90,9 +91,9 @@ pub async fn kill_instance( state: tauri::State<'_, AppState>, id: String, ) -> Result<(), String> { + state.cdp_bridge.disconnect(&id).await; let mut instances = state.instances.lock().unwrap(); if let Some(mut inst) = instances.remove(&id) { - // Close browser window if open if let Some(label) = &inst.browser_window_label { if let Some(window) = app.get_webview_window(label) { let _ = window.close(); @@ -110,9 +111,15 @@ pub async fn kill_all_instances( app: AppHandle, state: tauri::State<'_, AppState>, ) -> Result<(), String> { + let ids: Vec = { + let instances = state.instances.lock().unwrap(); + instances.keys().cloned().collect() + }; + for id in &ids { + state.cdp_bridge.disconnect(id).await; + } let mut instances = state.instances.lock().unwrap(); for (_, mut inst) in instances.drain() { - // Close browser window if open if let Some(label) = &inst.browser_window_label { if let Some(window) = app.get_webview_window(label) { let _ = window.close(); @@ -257,7 +264,6 @@ pub async fn navigate_browser_window( inst.current_url = Some(url); } - let _ = parsed_url; Ok(()) } @@ -278,3 +284,158 @@ pub async fn close_browser_window( Ok(()) } + +// --------------------------------------------------------------------------- +// CDP bridge commands +// --------------------------------------------------------------------------- + +#[tauri::command] +pub async fn connect_instance( + app: AppHandle, + state: tauri::State<'_, AppState>, + instance_id: String, +) -> Result<(), String> { + let port = { + let instances = state.instances.lock().unwrap(); + instances + .get(&instance_id) + .ok_or_else(|| format!("instance '{}' not found", instance_id))? + .port + }; + + state.cdp_bridge.connect(instance_id.clone(), port, app).await; + + Ok(()) +} + +#[tauri::command] +pub async fn disconnect_instance( + state: tauri::State<'_, AppState>, + instance_id: String, +) -> Result<(), String> { + state.cdp_bridge.disconnect(&instance_id).await; + + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&instance_id) { + inst.agent_status = "idle".to_string(); + } + + Ok(()) +} + +#[tauri::command] +pub async fn execute_cdp( + state: tauri::State<'_, AppState>, + instance_id: String, + method: String, + params: serde_json::Value, +) -> Result { + let resp = state + .cdp_bridge + .send_command(&instance_id, method, params) + .await?; + + // CDP responses wrap the payload in {"id":N,"result":{...}} — extract inner result + if let Some(result) = resp.get("result").cloned() { + Ok(result) + } else if resp.get("error").is_some() { + Err(resp["error"]["message"].as_str().unwrap_or("CDP error").to_string()) + } else { + Ok(resp) + } +} + +#[tauri::command] +pub async fn get_semantic_tree( + state: tauri::State<'_, AppState>, + instance_id: String, +) -> Result { + let resp = state + .cdp_bridge + .send_command( + &instance_id, + "Pardus.semanticTree".to_string(), + serde_json::json!({}), + ) + .await?; + + // CDP responses wrap the payload in {"id":N,"result":{...}} — extract inner result + if let Some(result) = resp.get("result").cloned() { + Ok(result) + } else if resp.get("error").is_some() { + Err(resp["error"]["message"].as_str().unwrap_or("CDP error").to_string()) + } else { + Ok(resp) + } +} + +#[tauri::command] +pub async fn get_instance_events( + state: tauri::State<'_, AppState>, + instance_id: String, + limit: Option, + since: Option, +) -> Result, String> { + Ok(state + .cdp_bridge + .get_events(&instance_id, limit.unwrap_or(100), since) + .await) +} + +#[tauri::command] +pub async fn get_bridge_status( + state: tauri::State<'_, AppState>, + instance_id: String, +) -> Result { + state + .cdp_bridge + .get_status(&instance_id) + .await + .ok_or_else(|| format!("no bridge for instance '{}'", instance_id)) +} + +// --------------------------------------------------------------------------- +// Agent status commands +// --------------------------------------------------------------------------- + +#[tauri::command] +pub async fn set_agent_status( + state: tauri::State<'_, AppState>, + instance_id: String, + status: String, +) -> Result<(), String> { + let valid = [ + "idle", "connected", "running", "paused", + "waiting-challenge", "error", + ]; + if !valid.contains(&status.as_str()) { + return Err(format!( + "invalid status '{}'. expected: {}", + status, + valid.join(", ") + )); + } + + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&instance_id) { + let old = inst.agent_status.clone(); + inst.agent_status = status.clone(); + + drop(instances); + + if let Some(handle) = state.app_handle.lock().unwrap().as_ref() { + let _ = handle.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": instance_id, + "old_status": old, + "new_status": status, + }), + ); + } + } else { + return Err(format!("instance '{}' not found", instance_id)); + } + + Ok(()) +} diff --git a/crates/pardus-tauri/src-tauri/src/lib.rs b/crates/pardus-tauri/src-tauri/src/lib.rs index 79d1515..b07278a 100644 --- a/crates/pardus-tauri/src-tauri/src/lib.rs +++ b/crates/pardus-tauri/src-tauri/src/lib.rs @@ -1,4 +1,5 @@ mod browser_window; +mod cdp_bridge; mod challenge; mod commands; mod cookie_bridge; @@ -8,12 +9,14 @@ use std::collections::HashMap; use std::sync::Arc; use std::sync::Mutex; -use tauri::{Listener, Manager}; +use tauri::{Emitter, Listener, Manager}; pub struct AppState { pub instances: Mutex>, pub next_id: Mutex, pub resolver: Mutex>>, + pub cdp_bridge: cdp_bridge::CdpBridge, + pub app_handle: Mutex>, } pub fn run() { @@ -23,6 +26,8 @@ pub fn run() { instances: Mutex::new(HashMap::new()), next_id: Mutex::new(1), resolver: Mutex::new(None), + cdp_bridge: cdp_bridge::CdpBridge::new(), + app_handle: Mutex::new(None), }) .invoke_handler(tauri::generate_handler![ commands::list_instances, @@ -35,6 +40,13 @@ pub fn run() { commands::open_browser_window, commands::navigate_browser_window, commands::close_browser_window, + commands::connect_instance, + commands::disconnect_instance, + commands::execute_cdp, + commands::get_semantic_tree, + commands::get_instance_events, + commands::get_bridge_status, + commands::set_agent_status, ]) .setup(|app| { tracing_subscriber::fmt() @@ -45,13 +57,17 @@ pub fn run() { .init(); let app_handle = app.handle().clone(); + + { + let state = app.state::(); + *state.app_handle.lock().unwrap() = Some(app_handle.clone()); + } + let resolver = Arc::new(challenge::TauriChallengeResolver::new(app_handle)); - // Store resolver in state let state = app.state::(); *state.resolver.lock().unwrap() = Some(resolver.clone()); - // Listen for cookie events from challenge webviews let r_cookies = resolver.clone(); app.listen("challenge-cookies", move |event| { let payload = event.payload(); @@ -65,7 +81,6 @@ pub fn run() { } }); - // Listen for timeout events from challenge webviews let r_timeout = resolver.clone(); app.listen("challenge-timeout", move |event| { let payload = event.payload(); @@ -78,7 +93,6 @@ pub fn run() { } }); - // Listen for browser-navigate events from browser window toolbars let nav_handle = app.handle().clone(); app.listen("browser-navigate", move |event| { let payload = event.payload(); @@ -88,12 +102,10 @@ pub fn run() { let h = nav_handle.clone(); tauri::async_runtime::spawn(async move { let label = format!("browser-{}", instance_id); - // Close and reopen with new URL if let Some(window) = h.get_webview_window(&label) { let _ = window.close(); } if let Ok(_new_label) = browser_window::open_browser_window(&h, &instance_id, &url) { - // Update instance state let state = h.state::(); let mut instances = state.instances.lock().unwrap(); if let Some(inst) = instances.get_mut(&instance_id) { @@ -104,7 +116,6 @@ pub fn run() { } }); - // Listen for browser-url-changed events to track current URL let url_handle = app.handle().clone(); app.listen("browser-url-changed", move |event| { let payload = event.payload(); @@ -120,6 +131,206 @@ pub fn run() { } }); + app.listen("challenge-detected", { + let h = app.handle().clone(); + move |event| { + let payload = event.payload(); + if let Ok(data) = serde_json::from_str::(payload) { + let challenge_url = data["url"].as_str().unwrap_or("").to_string(); + let h = h.clone(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let instances = state.instances.lock().unwrap(); + for (inst_id, inst) in instances.iter() { + if inst.current_url.as_deref() == Some(challenge_url.as_str()) { + let inst_id = inst_id.clone(); + drop(instances); + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + if inst.agent_status != "waiting-challenge" { + inst.agent_status = "waiting-challenge".to_string(); + let _ = h.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": inst_id, + "old_status": "running", + "new_status": "waiting-challenge", + }), + ); + } + } + return; + } + } + }); + } + } + }); + + app.listen("challenge-solved", { + let h = app.handle().clone(); + move |event| { + let payload = event.payload(); + if let Ok(data) = serde_json::from_str::(payload) { + let solved_url = data["url"].as_str().unwrap_or("").to_string(); + let h = h.clone(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let instances = state.instances.lock().unwrap(); + for (inst_id, inst) in instances.iter() { + if inst.agent_status == "waiting-challenge" + && inst.current_url.as_deref() == Some(solved_url.as_str()) + { + let inst_id = inst_id.clone(); + drop(instances); + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + inst.agent_status = "running".to_string(); + let _ = h.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": inst_id, + "old_status": "waiting-challenge", + "new_status": "running", + }), + ); + } + return; + } + } + }); + } + } + }); + + app.listen("challenge-failed", { + let h = app.handle().clone(); + move |event| { + let payload = event.payload(); + if let Ok(data) = serde_json::from_str::(payload) { + let failed_url = data["challenge_url"].as_str().unwrap_or("").to_string(); + let h = h.clone(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let instances = state.instances.lock().unwrap(); + for (inst_id, inst) in instances.iter() { + if inst.agent_status == "waiting-challenge" + && inst.current_url.as_deref() == Some(failed_url.as_str()) + { + let inst_id = inst_id.clone(); + drop(instances); + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + inst.agent_status = "error".to_string(); + let _ = h.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": inst_id, + "old_status": "waiting-challenge", + "new_status": "error", + }), + ); + } + return; + } + } + }); + } + } + }); + + app.listen("cdp-bridge-connected", { + let h = app.handle().clone(); + move |event| { + let payload = event.payload(); + if let Ok(data) = serde_json::from_str::(payload) { + let inst_id = data["instance_id"].as_str().unwrap_or("").to_string(); + let h = h.clone(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + if inst.agent_status == "idle" { + inst.agent_status = "connected".to_string(); + let _ = h.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": inst_id, + "old_status": "idle", + "new_status": "connected", + }), + ); + } + } + }); + } + } + }); + + app.listen("cdp-event", { + let h = app.handle().clone(); + move |event| { + let payload = event.payload(); + if let Ok(data) = serde_json::from_str::(payload) { + let instance_id = data["instance_id"].as_str().unwrap_or("").to_string(); + let method = data["method"].as_str().unwrap_or("").to_string(); + + let is_action_event = method.starts_with("Pardus.action"); + let is_navigation = method == "Page.frameNavigated"; + + if !is_action_event && !is_navigation { + return; + } + + if is_navigation { + let has_parent = data["params"]["frame"].get("parentId").is_some(); + if !has_parent { + if let Some(url) = data["params"]["frame"]["url"].as_str() { + let h = h.clone(); + let inst_id = instance_id.clone(); + let url = url.to_string(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + inst.current_url = Some(url); + if inst.agent_status == "connected" || inst.agent_status == "idle" { + inst.agent_status = "running".to_string(); + let _ = h.emit( + "agent-status-changed", + serde_json::json!({ + "instance_id": inst_id, + "old_status": inst.agent_status.clone(), + "new_status": "running", + }), + ); + } + } + }); + } + } + } + + if is_action_event && method == "Pardus.actionStarted" { + let h = h.clone(); + let inst_id = instance_id.clone(); + tauri::async_runtime::spawn(async move { + let state = h.state::(); + let mut instances = state.instances.lock().unwrap(); + if let Some(inst) = instances.get_mut(&inst_id) { + if inst.agent_status == "connected" || inst.agent_status == "idle" { + inst.agent_status = "running".to_string(); + } + } + }); + } + } + } + }); + Ok(()) }) .run(tauri::generate_context!()) diff --git a/crates/pardus-tauri/src-tauri/tauri.conf.json b/crates/pardus-tauri/src-tauri/tauri.conf.json index 8e86057..17e752e 100644 --- a/crates/pardus-tauri/src-tauri/tauri.conf.json +++ b/crates/pardus-tauri/src-tauri/tauri.conf.json @@ -4,8 +4,10 @@ "version": "0.1.0", "identifier": "ai.pardus.browser", "build": { - "beforeBuildCommand": "npm run build", - "frontendDist": "../dist" + "beforeBuildCommand": "cd ../../crates/pardus-tauri/frontend && npm run build", + "beforeDevCommand": "cd ../../crates/pardus-tauri/frontend && npm run dev", + "devUrl": "http://localhost:1420", + "frontendDist": "../frontend/dist" }, "app": { "windows": [