diff --git a/.env.example b/.env.example index d1134e9..7c0c4c8 100644 --- a/.env.example +++ b/.env.example @@ -28,9 +28,17 @@ LLM_PROVIDER= # ── Agent config ───────────────────────────────────────────────────────────── AGENT_TIMEZONE=America/Chicago -# ── Optional: Bluesky credentials for post-to-site ─────────────────────────── -# Leave empty to disable Bluesky posting end-to-end (post-to-site will respond -# "Credential '…' is not configured"). Use a Bluesky app password from +# ── Optional: Foragent embeddings (for skill + memory semantic retrieval) ──── +# Separate from FORAGENT_LLM_* so embeddings can live on a different Azure +# Foundry subscription or deployment. If any are left empty, Foragent falls +# back to BM25-only retrieval (logged once at startup). +FORAGENT_EMBEDDINGS_ENDPOINT= +FORAGENT_EMBEDDINGS_MODEL_ID=text-embedding-3-small +FORAGENT_EMBEDDINGS_API_KEY= + +# ── Optional: Bluesky credentials ──────────────────────────────────────────── +# The generalist browser-task can use credentials by id (once a credentialed +# login tool lands in a later step). Use a Bluesky app password from # https://bsky.app/settings/app-passwords, never your account password. # Stored only in-process by Foragent's InMemoryCredentialBroker — never logged, # never sent over A2A. For prod, swap in a k8s-secrets or vault broker. diff --git a/CLAUDE.md b/CLAUDE.md index c34ce47..a5f2a9b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Status -Foragent is at **milestone 6 shipped, step 7 next**. Four capabilities are live (`browser-task`, `fetch-page-title`, `extract-structured-data`, `post-to-site`); the A2A loop is wired end-to-end against RockBot via the `docker-compose.yml` harness pinned to `rockylhotka/rockbot-agent:0.8.5`. Step 6 shipped the generalist `browser-task` planner (LLM-in-the-loop over ref-annotated aria snapshots + `aria-ref=eN` locator resolution, built on `Microsoft.Playwright` 1.59 — bumped from 1.50 for the Ai aria-snapshot mode; see Appendix A #16). Tiered chat clients are wired via `AddRockBotTieredChatClients` with one model aliased across Low/Balanced/High per spec §3.7. The governing spec is `docs/foragent-specification.md` **v0.2**. Step 7 wires `ISkillStore` + `ILongTermMemory` priming; `post-to-site` is removed from the advertised skill list once `browser-task` + the learned bsky skill cover it. Storage-state persistence, 2FA input-required flow, k8s-secrets broker, and per-tenant credential namespaces remain deferred — tracked in `docs/framework-feedback.md`. Framework-level observations from each milestone are captured in `docs/framework-feedback.md`. +Foragent is at **milestone 7 shipped, step 8 next**. Three capabilities are advertised (`browser-task`, `fetch-page-title`, `extract-structured-data`); the A2A loop is wired end-to-end against RockBot via the `docker-compose.yml` harness pinned to `rockylhotka/rockbot-agent:0.8.5`. Step 6 shipped the generalist `browser-task` planner (LLM-in-the-loop over ref-annotated aria snapshots + `aria-ref=eN` locator resolution, built on `Microsoft.Playwright` 1.59 — bumped from 1.50 for the Ai aria-snapshot mode; see Appendix A #16). Tiered chat clients are wired via `AddRockBotTieredChatClients` with one model aliased across Low/Balanced/High per spec §3.7. Step 7 wired the learning substrate: `ISkillStore` + `ILongTermMemory` via `WithSkills()` + `WithLongTermMemory()`, `BrowserTaskPriming` injects retrieved skill + memory content into the planner prompt, successful tasks write a learned skill at `sites/{host}/learned/{slug}`, and `BskySeedSkillService` seeds `sites/bsky.app/login` on first start (idempotent — only writes when absent). Embeddings are optional and configured separately under `ForagentEmbeddings` so they can live on a different Azure Foundry subscription than the chat model; missing embeddings downgrade retrieval to BM25-only with a single startup warning. The step-6 unaided benchmark (3/3) still passes after the priming wiring. `post-to-site` has been removed from both the advertised skill list and the codebase (greenfield deletion — `browser-task` + the learned bsky skill cover the use case). The governing spec is `docs/foragent-specification.md` **v0.2**. Storage-state persistence, 2FA input-required flow, k8s-secrets broker, and per-tenant credential namespaces remain deferred — tracked in `docs/framework-feedback.md`. Framework-level observations from each milestone are captured in `docs/framework-feedback.md`. ## Build / test @@ -47,7 +47,7 @@ Four library/host projects with a strict layering: ``` Foragent.Agent (executable, A2A server host, DI composition root) - ├─ Foragent.Capabilities (task-level verbs: fetch-page-content, post-to-site, …) + ├─ Foragent.Capabilities (task-level verbs: browser-task, fetch-page-title, …) │ └─ Foragent.Browser (Playwright wrapper; owns browser + per-task BrowserContext) └─ Foragent.Credentials (ICredentialBroker abstraction + built-in brokers) ``` @@ -68,6 +68,7 @@ Key framework pieces Foragent uses today: - `RockBot.Host.AddRockBotHost` + `AgentHostBuilder.AddA2A` — bus-side agent registration. Subscribes to `agent.task.{agentName}` on RabbitMQ. - `RockBot.A2A.IAgentTaskHandler` — the single per-agent extension point. `ForagentTaskHandler` (in `Foragent.Capabilities`) implements this and dispatches on `request.Skill`. - `RockBot.A2A.Gateway.AddA2AHttpGateway` + `MapA2AHttpGateway` — the in-process HTTP surface. Published as NuGet in RockBot 0.8.4 (see `docs/framework-feedback.md`). +- `RockBot.Host.AgentMemoryExtensions.WithSkills` / `WithLongTermMemory` — file-backed `ISkillStore` + `ILongTermMemory` (step 7). `ISkillStore.SearchAsync` takes an explicit `float[]? queryEmbedding`; callers compute the embedding. `Skill` record is lean (`Name, Summary, Content, CreatedAt, UpdatedAt?, LastUsedAt?, SeeAlso`) — no tags or importance field. Foragent requires an LLM. Config lives under `ForagentLlm` — separate from any rockbot-side `LLM` config so the two agents can point at different models. Program.cs fails fast at startup if `ForagentLlm:Endpoint`/`ModelId`/`ApiKey` are missing. Starting step 6 the single configured model is wired via `AddRockBotTieredChatClients(low, balanced, high)` aliased to the same inner `IChatClient`; that one call registers both `IChatClient` (wrapped with `RockBotFunctionInvokingChatClient` for automatic tool invocation) and `TieredChatClientRegistry` (per spec §3.7). Don't also call `AddRockBotChatClient` — it would swap out the wrapped registration. Capabilities that want to escalate/de-escalate per request can resolve `TieredChatClientRegistry` and call `GetClient(ModelTier.Low|Balanced|High)`; none do today. @@ -75,7 +76,7 @@ Foragent requires an LLM. Config lives under `ForagentLlm` — separate from any `Foragent.Browser` wraps Playwright. `AddForagentBrowser()` in `Foragent.Agent/Program.cs` registers `PlaywrightBrowserHost` (`IHostedService` owning one shared Chromium per process) and `IBrowserSessionFactory` (hands out a fresh `IBrowserContext` per A2A task — isolation guarantee from spec §3.5). `IBrowserSession` exposes `FetchPageTitleAsync` / `CapturePageSnapshotAsync` for one-shot reads, `OpenPageAsync` → `IBrowserPage` (navigate / fill / click / wait / read) for multi-step flows like login + post, and `OpenAgentPageAsync` → `IBrowserAgentPage` for LLM-in-the-loop planners (ref-annotated aria snapshots + `aria-ref=eN` locator resolution). The snapshot uses Chromium's aria-snapshot (via `Locator.AriaSnapshotAsync`; `Mode = AriaSnapshotMode.Ai` gets the ref-annotated form) and falls back to `` inner text when the tree is empty. Selectors passed to `IBrowserPage` use Playwright's string-selector dialect (CSS + `role=role[name="..."]`); **regex is not accepted in string form**, use exact attribute matches. `Foragent.Browser` has `InternalsVisibleTo("Foragent.Browser.Tests")` so tests drive the real `PlaywrightBrowserSessionFactory` without promoting its implementation types to public. -`CreateSessionAsync(Func allowedHost, ...)` is the step-6 entry point for allowlist-scoped sessions. The factory installs a context-wide `RouteAsync("**/*", ...)` that aborts off-list document/subframe navigations before Playwright issues the request (spec §7.1). The no-argument overload accepts any host and stays available for specialists that enforce narrower rules elsewhere (e.g. `post-to-site` where the site id selects the host). +`CreateSessionAsync(Func allowedHost, ...)` is the step-6 entry point for allowlist-scoped sessions. The factory installs a context-wide `RouteAsync("**/*", ...)` that aborts off-list document/subframe navigations before Playwright issues the request (spec §7.1). The no-argument overload accepts any host and stays available for specialists that enforce narrower rules elsewhere. ## Capabilities @@ -84,10 +85,26 @@ Foragent requires an LLM. Config lives under `ForagentLlm` — separate from any - Each capability implements `ICapability` — owns its own `AgentSkill` metadata (exposed as a static `SkillDefinition`) and its own `ExecuteAsync` logic. - `ForagentTaskHandler` is a pure dispatcher that resolves `IEnumerable` from DI and routes on `SkillId`. **Do not add skill-specific logic to the handler.** New capabilities go in new `ICapability` classes. - `ForagentCapabilities.Skills` (static array) is the single source of truth for advertised skills — both the bus-side `AgentCard.Skills` and the HTTP gateway's `opts.Skills` read from it. -- `CapabilityInput.Parse` is the shared URL + description shim used by `fetch-page-title` and `extract-structured-data`. Capabilities with different input shapes (e.g. `post-to-site` needing `site` / `credentialId` / `content`) parse their own input near the capability — see `PostToSiteInput` in `PostToSiteCapability.cs`. Don't overload `CapabilityInput` for unrelated shapes. -- `post-to-site` dispatches to an `ISitePoster` keyed on `Site` (in `SitePosting/`). `BlueskySitePoster` is the only implementation today; add new sites by registering another `ISitePoster` in `AddForagentCapabilities()`. The capability never echoes exception messages from posters back to callers — they may contain credential material; operators read the full exception in logs. +- `CapabilityInput.Parse` is the shared URL + description shim used by `fetch-page-title` and `extract-structured-data`. Capabilities with different input shapes parse their own input near the capability (e.g. `BrowserTaskInput` in `BrowserTask/`). Don't overload `CapabilityInput` for unrelated shapes. - `browser-task` (in `BrowserTask/`) is the generalist planner (spec §5.2). `BrowserTaskInput` parses intent + mandatory `allowedHosts` + optional `url` / `credentialId` / `maxSteps` (default 60, ceiling 150) / `maxSeconds` (default 120, ceiling 600). `BrowserTaskTools` wraps `snapshot` / `navigate` / `click` / `type` / `wait_for` / `done` / `fail` as `AIFunction`s via `AIFunctionFactory.Create` and passes them in `ChatOptions.Tools`; the RockBot-wrapped function-invoking `IChatClient` runs the full model ↔ tool loop inside one `GetResponseAsync` call. Budget is enforced tool-side (each tool checks `BrowserTaskState.BudgetExhausted`) because Microsoft.Extensions.AI does not surface per-request iteration caps through `ChatOptions`; wall-clock is a linked `CancellationTokenSource`. **Never log tool arguments verbatim** — `type` carries user-supplied values that may be sensitive (log length only). Refs from a snapshot are valid only until the next mutating call; the system prompt and tool descriptions both state this, but don't code anything that assumes cross-snapshot ref stability. +## Learning substrate (step 7) + +Two RockBot framework stores are wired into the host via `AgentHostBuilder`: + +- `ISkillStore` (`agent.WithSkills(opts => opts.BasePath = …)`) — file-backed skill store for markdown site primers. Content root defaults to `ForagentMemory:SkillsPath` or `data/skills`. +- `ILongTermMemory` (`agent.WithLongTermMemory(opts => opts.BasePath = …)`) — file-backed memory for declarative observations. `ForagentMemory:MemoryPath` or `data/memory`. + +`BrowserTaskPriming` (DI-scoped) runs before each `browser-task` planner call: it derives a query from intent + primary allowlist host, optionally computes an embedding via `IEmbeddingGenerator>`, and calls `ISkillStore.SearchAsync` + `ILongTermMemory.SearchAsync` in parallel. Retrieved content is injected as a "Known site knowledge" section in the user prompt. Fail-soft: either store throwing is logged at debug and skipped, so a broken priming path never fails a task. + +Embeddings are optional. `ForagentEmbeddings:Endpoint` / `ModelId` / `ApiKey` are all-or-nothing; missing any one drops back to BM25-only with a startup warning. The embeddings config is a separate section from `ForagentLlm` because the user's subscription for embeddings lives elsewhere from the chat deployment — keep them split. + +On successful completion (`state.IsDone`), `BrowserTaskCapability.TryWriteLearnedSkillAsync` runs one extra synthesizer LLM turn (same `IChatClient`, no tools) to author a reusable skill at `sites/{primaryHost}/learned/{intent-slug}`. The synthesizer prompt forbids including credential values or typed field contents. Writes are skipped when the task was trivial (≤1 navigation) or the primary host can't be determined. Errors are logged but never fail the completed task. + +`BskySeedSkillService` (IHostedService) seeds `sites/bsky.app/login` on first start by calling `ISkillStore.GetAsync` and only writing if absent — docker volume recreation reseeds cleanly; operator edits to the skill through other channels are preserved. + +Skill naming follows spec §5.6: `sites/{host}/{intent}` for human-authored primers, `sites/{host}/learned/{slug}` for agent-generated. `Skill.SeeAlso` cross-references related skills to surface clusters rather than single entries. **Note:** `Skill` (from `RockBot.Host 0.8.5`) does not carry tags, metadata, or importance — the `agent-learned` distinction is encoded in the name prefix only. + ## Credentials `Foragent.Credentials` ships `ICredentialBroker` + `CredentialReference(Id, Kind, Values)`. `AddForagentCredentials(configuration, "Credentials")` wires an `InMemoryCredentialBroker` bound to the config section — dev/test only per spec §6.3. Populate via user-secrets (`dotnet user-secrets set "Credentials:bluesky-rocky:Kind" username-password`, etc.), never appsettings.json. **Never log `CredentialReference.Values`**, never include them in A2A responses, never embed them in exception messages. `CredentialReference.ToString()` deliberately does not expose values. Missing credentials throw `CredentialNotFoundException` carrying only the id. diff --git a/deploy/rockbot-seed/agent-trust.json b/deploy/rockbot-seed/agent-trust.json index 10ffabe..05a69b7 100644 --- a/deploy/rockbot-seed/agent-trust.json +++ b/deploy/rockbot-seed/agent-trust.json @@ -2,7 +2,7 @@ { "agentId": "Foragent", "level": 4, - "approvedSkills": ["browser-task", "fetch-page-title", "extract-structured-data", "post-to-site"], + "approvedSkills": ["browser-task", "fetch-page-title", "extract-structured-data"], "firstSeen": "2026-04-21T00:00:00+00:00", "lastInteraction": "2026-04-21T00:00:00+00:00", "interactionCount": 0 diff --git a/deploy/rockbot-seed/well-known-agents.json b/deploy/rockbot-seed/well-known-agents.json index 35bcd03..c973a20 100644 --- a/deploy/rockbot-seed/well-known-agents.json +++ b/deploy/rockbot-seed/well-known-agents.json @@ -22,11 +22,6 @@ "id": "extract-structured-data", "name": "Extract Structured Data", "description": "Navigate to a URL and extract data matching a natural-language description, returning JSON. Input the target URL and a description of what to extract." - }, - { - "id": "post-to-site", - "name": "Post to Site", - "description": "Authenticate against a configured site (by credential identifier) and publish a post. Input JSON {\"site\":\"bluesky\",\"credentialId\":\"...\",\"content\":\"...\"} or message metadata fields site / credentialId / content. Credential values never cross the A2A boundary." } ] } diff --git a/docker-compose.yml b/docker-compose.yml index e7a5ac9..2f0b7e1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -61,7 +61,7 @@ services: RabbitMq__VirtualHost: / Gateway__AgentName: Foragent Gateway__InternalAgentName: Foragent - Gateway__Description: "Browser agent — browser-task (generalist), fetch-page-title, extract-structured-data, post-to-site" + Gateway__Description: "Browser agent — browser-task (generalist), fetch-page-title, extract-structured-data" # RockBot will call Foragent with header X-Api-Key: rockbot-calls-foragent ApiKeys__rockbot-calls-foragent__AgentId: RockBot ApiKeys__rockbot-calls-foragent__DisplayName: RockBot @@ -70,15 +70,25 @@ services: ForagentLlm__Endpoint: ${FORAGENT_LLM_ENDPOINT:?FORAGENT_LLM_ENDPOINT is required} ForagentLlm__ModelId: ${FORAGENT_LLM_MODEL_ID:?FORAGENT_LLM_MODEL_ID is required} ForagentLlm__ApiKey: ${FORAGENT_LLM_API_KEY:?FORAGENT_LLM_API_KEY is required} - # Optional Bluesky credential for post-to-site. Callers invoke post-to-site - # with credentialId: "bluesky-rocky". Flat id (no slashes) because env-var - # keys use __ to separate config path segments — ids with slashes work via - # appsettings / user-secrets but not via env vars. Leave unset to disable; - # post-to-site will report "Credential '…' is not configured." - # For prod, replace InMemoryCredentialBroker with k8s-secrets. + # Optional embeddings. Empty values (default) → BM25-only skill + memory + # retrieval. Set any one of Endpoint/ModelId/ApiKey empty and the others + # are ignored (all-or-nothing at startup). Separate subscription from + # ForagentLlm because embedding deployments often live elsewhere. + ForagentEmbeddings__Endpoint: ${FORAGENT_EMBEDDINGS_ENDPOINT:-} + ForagentEmbeddings__ModelId: ${FORAGENT_EMBEDDINGS_MODEL_ID:-} + ForagentEmbeddings__ApiKey: ${FORAGENT_EMBEDDINGS_API_KEY:-} + # Step 7: skills + long-term memory (spec §5.6). Paths align with the + # mounted volume below so learned site knowledge survives restarts. + ForagentMemory__SkillsPath: /data/foragent/skills + ForagentMemory__MemoryPath: /data/foragent/memory + # Optional Bluesky credential used by future credentialed browser-task + # runs. Flat id (no slashes) because env-var keys use __ to separate + # config segments. Leave unset to disable. Credentials__bluesky-rocky__Kind: username-password Credentials__bluesky-rocky__Values__identifier: ${FORAGENT_BLUESKY_IDENTIFIER:-} Credentials__bluesky-rocky__Values__password: ${FORAGENT_BLUESKY_APP_PASSWORD:-} + volumes: + - foragent-data:/data/foragent rockbot-init: image: rockylhotka/rockbot-agent:0.8.5 @@ -177,3 +187,4 @@ services: volumes: rockbot-data: rockbot-shared: + foragent-data: diff --git a/docs/capabilities.md b/docs/capabilities.md index 31bd7c4..e8ac2e3 100644 --- a/docs/capabilities.md +++ b/docs/capabilities.md @@ -7,12 +7,14 @@ invoke capabilities by name; Foragent handles the browser mechanics. - `browser-task` — **generalist**, spec §5.2. LLM-in-the-loop planner that drives a real browser to accomplish a free-form intent. Shipped in - step 6. + step 6; step 7 added skills + memory priming (spec §5.6). - `fetch-page-title` — specialist. Inherited from step 1/2. - `extract-structured-data` — specialist. Inherited from step 3. -- `post-to-site` — specialist, credential-using. Inherited from step 4. - Scheduled for removal from the advertised list once step 7 lands - (`browser-task` + learned bsky skill subsume it). + +The step-4 `post-to-site` capability was removed in step 7 — the +generalist `browser-task` plus the seeded `sites/bsky.app/login` skill +subsume its function, and the project is still pre-public so no consumer +needed a deprecation path. ## `browser-task` input shape @@ -35,8 +37,8 @@ JSON in the first text part, or field-by-field metadata: unrestricted. Off-list navigations are aborted inside the browser context before Playwright issues the request. - `url` — optional. If provided, must match the allowlist. -- `credentialId` — optional. Resolved but not exposed to the planner in - step 6; reserved for a typed login tool in a later step. +- `credentialId` — optional. Resolved but not exposed to the planner + yet; reserved for a typed login tool in a later step. - `maxSteps` — default 60, ceiling 150. Enforced tool-side via `BrowserTaskState.BudgetExhausted`; once exceeded, tools return a "call done or fail" message and refuse further work. diff --git a/docs/framework-feedback.md b/docs/framework-feedback.md index c02a69d..dcc86a8 100644 --- a/docs/framework-feedback.md +++ b/docs/framework-feedback.md @@ -339,3 +339,73 @@ the planner prompt, tool surface, or model pin changes. - **`TieredChatClientRegistry.GetClient(ModelTier.Low/High)` is wired but no capability resolves it yet.** All three tiers currently alias to the same model. Tier-aware capability code lands as models diverge. + +## Step 7 — skills + memory priming + +### Framework observations + +- **`Skill` record has no tags, metadata, or importance field.** The + 0.8.5 shape is `(Name, Summary, Content, CreatedAt, UpdatedAt?, + LastUsedAt?, SeeAlso)`. The "agent-learned vs human-authored" + distinction Foragent needs (and spec §5.6 calls out) has no first-class + slot — today it's encoded in the name prefix (`sites/{host}/learned/…` + vs `sites/{host}/…`). `ILongTermMemory`'s `MemoryEntry` by contrast + carries `Category`, `Tags`, `Metadata`, and `ImportanceScore`. Skills + would benefit from at least `Metadata` parity: agent-learned skills + want a `confidence` score, a `last-verified` timestamp, and a + `source` tag so the planner can weight them below operator primers. + Framework candidate: add `IReadOnlyDictionary? + Metadata` on `Skill` without changing the file-backed format's + tolerance of older shapes. + +- **`ISkillStore.SearchAsync` takes an explicit `float[]? + queryEmbedding`.** Callers compute the embedding (via + `IEmbeddingGenerator>` from DI). This is + the right shape — it lets consumers cache embeddings across stores + and pick when to spend the embedding call — but it means the store + can't do any "cheap query → skip embedding" optimisation on its own. + Fine for Foragent's usage; worth noting for anyone expecting the + RockBot agent's auto-recall pattern (where the framework does the + embedding behind the scenes) to carry over. + +- **No tests-side mock / in-memory `ISkillStore`.** Foragent's tests + ship a 12-line `FakeSkillStore` that implements the interface by + hand. Framework candidate: a `RockBot.Host.Testing` package with + in-memory implementations of the persistence stores would let + downstream agents write tests without re-implementing the bag of + interfaces. Low-priority but trivial to produce. + +- **Extension methods hang off `AgentHostBuilder`, not + `IServiceCollection`.** `WithSkills` / `WithLongTermMemory` must be + called inside the `AddRockBotHost(agent => …)` callback; calling + them outside on `builder.Services` isn't possible. This is fine — it + enforces the "owned by the agent host" model — but the naming + convention (`With…` on a builder vs `Add…` on services) took a moment + to discover. No ask; noting for consistency. + +- **Embedding generator integration is implicit.** Register an + `IEmbeddingGenerator>` in DI; `FileSkillStore` + and `FileMemoryStore` presumably pick it up for vector persistence on + `SaveAsync`. There's no explicit handshake in the extension method + signatures (`WithSkills(opts)` doesn't take an + `IEmbeddingGenerator` or a `UseEmbeddings(bool)`). Works, but an + explicit `opts.UseEmbeddings = true/auto/off` would make the behavior + discoverable from `SkillOptions` alone. + +### Config and operator-facing shape + +- **Split embedding config from chat config.** Foragent ships two + separate config sections — `ForagentLlm` and `ForagentEmbeddings` — + because in practice embedding deployments live on different Azure AI + Foundry subscriptions than the chat deployment. If RockBot's own + `EmbeddingOptions` (under `Embedding:*`) later wants to grow this + flexibility, the Foragent layout is a reasonable reference shape. + +### Unaided floor regression check (2026-04-22) + +Re-ran the step-6 benchmark with priming wired in but stores empty +(NoopSkillStore / NoopLongTermMemory in the integration harness). All 3 +scenarios still pass on first attempt — the priming wiring itself adds +no overhead when the stores return nothing, confirming the fail-soft +contract. A separate benchmark with a populated store is step-8-or-later +work (need a curated skill set worth priming against). diff --git a/src/Foragent.Agent/BskySeedSkillService.cs b/src/Foragent.Agent/BskySeedSkillService.cs new file mode 100644 index 0000000..4b2947c --- /dev/null +++ b/src/Foragent.Agent/BskySeedSkillService.cs @@ -0,0 +1,119 @@ +using Microsoft.Extensions.Hosting; +using Microsoft.Extensions.Logging; +using RockBot.Host; + +namespace Foragent.Agent; + +/// +/// Seeds the operator-authored sites/bsky.app/login skill into the +/// configured on startup if it is not already +/// present. Idempotent — subsequent starts are no-ops, so recreating the +/// persistence volume does not wipe human-authored priming, but editing the +/// stored skill through another channel is preserved. +/// +/// Human-authored skills seed the generalist planner with site-specific +/// primers (spec §5.6). Agent-learned skills are written by the capability +/// itself on successful task completion. +/// +internal sealed class BskySeedSkillService( + ISkillStore skillStore, + ILogger logger) : IHostedService +{ + private const string SkillName = "sites/bsky.app/login"; + + private const string SkillSummary = + "Log in to bsky.app with an app password; look for 2FA challenges."; + + private const string SkillContent = """ + # Logging in to bsky.app + + Bluesky's public web app is at `https://bsky.app`. The login flow is + straightforward but has a few gotchas worth knowing before you plan + tool calls. + + ## App passwords, not account passwords + + Always authenticate with an **app password** (generated by the user + at `https://bsky.app/settings/app-passwords`), not the account + password. Account passwords trigger the stronger 2FA flow much more + often, and the caller will have supplied an app-password credential + if they intend you to log in. + + ## Login form + + The sign-in screen is reachable from the home page "Sign in" button + or directly at `https://bsky.app/` when signed out. The form is a + single page with two visible inputs: + + - Identifier — accepts either the handle (`rocky.bsky.social`) or a + custom-domain handle (`rocky.lhotka.net`). **Not** the email. + - Password — plain password field; paste the app password here. + + Submit the form by clicking the button labelled "Next" (then + "Sign in" on the second screen once the identifier is validated). + + ## 2FA / email-code challenges + + Even with an app password, bsky occasionally prompts for an emailed + 6-digit code when signing in from a fresh context. If you see a + screen asking for a code, stop and call `fail` — 2FA is handled by + the caller, not by the planner. A future capability may surface + this as an A2A `input-required` state. + + ## Composing a post after login + + The compose button is the large plus-icon floating action button on + the home feed, or `https://bsky.app/compose`. The editor is a + ProseMirror `contenteditable` — plain-text typing works; avoid + pasting rich content through the tool surface. + + ## Known failure modes + + - **Rate limit / "try again later".** Space retries out; don't + thrash on failed submits. + - **Handle vs email.** Entering the email in the identifier field + silently fails — re-snapshot and look for a validation message + before assuming it worked. + - **Stale cookie banner.** A one-time cookie banner overlays the + sign-in button on first visit; accept it (click the "OK" or + "Accept" button) before trying to click "Sign in". + + ## See also + + - `sites/bsky.app/compose-post` once that skill is learned. + """; + + public async Task StartAsync(CancellationToken cancellationToken) + { + try + { + var existing = await skillStore.GetAsync(SkillName); + if (existing is not null) + { + logger.LogDebug("Seed skill '{Name}' already present; leaving existing content untouched.", SkillName); + return; + } + + var skill = new Skill( + Name: SkillName, + Summary: SkillSummary, + Content: SkillContent, + CreatedAt: DateTimeOffset.UtcNow, + UpdatedAt: null, + LastUsedAt: null, + SeeAlso: ["sites/bsky.app/compose-post"]); + + await skillStore.SaveAsync(skill); + logger.LogInformation("Seeded human-authored skill '{Name}'.", SkillName); + } + catch (Exception ex) + { + // Seeding is a convenience — never fail the host because a seed + // skill couldn't be written (the capability will still work + // without it, it just loses the priming head start). + logger.LogWarning(ex, "Failed to seed skill '{Name}'.", SkillName); + } + } + + public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask; +} diff --git a/src/Foragent.Agent/Foragent.Agent.csproj b/src/Foragent.Agent/Foragent.Agent.csproj index 1072622..42339da 100644 --- a/src/Foragent.Agent/Foragent.Agent.csproj +++ b/src/Foragent.Agent/Foragent.Agent.csproj @@ -2,6 +2,9 @@ foragent-agent + + + diff --git a/src/Foragent.Agent/Program.cs b/src/Foragent.Agent/Program.cs index b6ca3f9..db6412c 100644 --- a/src/Foragent.Agent/Program.cs +++ b/src/Foragent.Agent/Program.cs @@ -1,4 +1,5 @@ using System.ClientModel; +using Foragent.Agent; using Foragent.Browser; using Foragent.Capabilities; using Foragent.Credentials; @@ -32,6 +33,27 @@ new OpenAIClientOptions { Endpoint = new Uri(llmEndpoint) }); var foragentChatClient = openAiClient.GetChatClient(llmModelId).AsIChatClient(); +// ── Embeddings (optional, spec §5.6). Separate config section because +// embedding deployments often live on a different Azure AI Foundry +// subscription than chat completions. Missing config is logged once and +// downgrades skill/memory retrieval to BM25-only. ──────────────────────── +var embedSection = builder.Configuration.GetSection("ForagentEmbeddings"); +var embedEndpoint = embedSection["Endpoint"]; +var embedModelId = embedSection["ModelId"]; +var embedApiKey = embedSection["ApiKey"]; +var embeddingsConfigured = !string.IsNullOrWhiteSpace(embedEndpoint) + && !string.IsNullOrWhiteSpace(embedModelId) + && !string.IsNullOrWhiteSpace(embedApiKey); +if (embeddingsConfigured) +{ + var embeddingClient = new OpenAIClient( + new ApiKeyCredential(embedApiKey!), + new OpenAIClientOptions { Endpoint = new Uri(embedEndpoint!) }) + .GetEmbeddingClient(embedModelId!) + .AsIEmbeddingGenerator(); + builder.Services.AddSingleton(embeddingClient); +} + // ── Messaging (RabbitMQ) ───────────────────────────────────────────────────── builder.Services.AddRockBotRabbitMq(opts => @@ -54,6 +76,13 @@ var gatewaySection = builder.Configuration.GetSection("Gateway"); var agentName = gatewaySection["InternalAgentName"] ?? gatewaySection["AgentName"] ?? "Foragent"; +// Skill + long-term memory paths. File-backed stores from RockBot.Host; both +// directories are created on first write. docker-compose mounts a named volume +// at these paths so learned site knowledge survives container restarts. +var memorySection = builder.Configuration.GetSection("ForagentMemory"); +var skillsPath = memorySection["SkillsPath"] ?? "data/skills"; +var memoryPath = memorySection["MemoryPath"] ?? "data/memory"; + builder.Services.AddRockBotHost(agent => { agent.WithIdentity(agentName); @@ -70,7 +99,15 @@ }; }); + // Step 7: skills + long-term memory as the learning substrate (spec §5.6). + // FileSkillStore / FileMemoryStore pick up an IEmbeddingGenerator from DI + // when registered (see ForagentEmbeddings above); otherwise they fall back + // to BM25 retrieval. + agent.WithSkills(opts => opts.BasePath = skillsPath); + agent.WithLongTermMemory(opts => opts.BasePath = memoryPath); + agent.Services.AddForagentCapabilities(); + agent.Services.AddHostedService(); }); builder.Services.AddForagentBrowser(); @@ -107,6 +144,19 @@ "Foragent starting — HTTP A2A on {Urls}, bus identity '{Identity}'", string.Join(", ", app.Urls.DefaultIfEmpty("(default)")), agentName); +if (embeddingsConfigured) +{ + app.Logger.LogInformation( + "ForagentEmbeddings configured ({ModelId}); skill + memory retrieval will use hybrid BM25 + vector.", + embedModelId); +} +else +{ + app.Logger.LogWarning( + "ForagentEmbeddings not configured — skill + memory retrieval will use BM25 only. " + + "Set ForagentEmbeddings:Endpoint/ModelId/ApiKey to enable semantic retrieval."); +} + app.Run(); public partial class Program; diff --git a/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs b/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs index 55feadf..c1ca8a9 100644 --- a/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs +++ b/src/Foragent.Capabilities/BrowserTask/BrowserTaskCapability.cs @@ -5,6 +5,7 @@ using Microsoft.Extensions.AI; using Microsoft.Extensions.Logging; using RockBot.A2A; +using RockBot.Host; namespace Foragent.Capabilities.BrowserTask; @@ -15,15 +16,17 @@ namespace Foragent.Capabilities.BrowserTask; /// capability — specialists exist only where deterministic, programmatic /// callers benefit from a typed shape. /// -/// v0.2 step 6 scope: no learning substrate, no credentials injection into -/// tools (credential id is acknowledged but unused beyond audit logging — -/// step 7 wires ISkillStore + ILongTermMemory priming, later -/// steps expose credentials to the planner through a typed tool). +/// Step 7 wires the learning substrate: + +/// provide priming on plan, and successful +/// tasks write a learned skill back so subsequent runs against the same +/// site benefit from accumulated knowledge (spec §5.6). /// public sealed class BrowserTaskCapability( IBrowserSessionFactory browserFactory, IChatClient chatClient, ICredentialBroker credentialBroker, + BrowserTaskPriming priming, + ISkillStore skillStore, ILogger logger) : ICapability { public static AgentSkill SkillDefinition { get; } = new() @@ -98,10 +101,11 @@ public async Task ExecuteAsync(AgentTaskRequest request, AgentT state.MaxSteps = input.MaxSteps; var tools = new BrowserTaskTools(page, state, input.Allowlist!, logger).BuildFunctions(); + var primingText = await priming.BuildAsync(input.Intent!, input.Allowlist!, linkedCts.Token); var messages = new List { new(ChatRole.System, SystemPrompt), - new(ChatRole.User, BuildUserPrompt(input)) + new(ChatRole.User, BuildUserPrompt(input, primingText)) }; var options = new ChatOptions @@ -130,6 +134,9 @@ public async Task ExecuteAsync(AgentTaskRequest request, AgentT input.MaxSeconds, state.Steps); } + if (state.IsDone) + await TryWriteLearnedSkillAsync(input, state, linkedCts.Token); + return BuildResult(request, input, state); } catch (OperationCanceledException) when (ct.IsCancellationRequested) @@ -143,7 +150,7 @@ public async Task ExecuteAsync(AgentTaskRequest request, AgentT } } - private static string BuildUserPrompt(BrowserTaskInput input) + private static string BuildUserPrompt(BrowserTaskInput input, string? primingText) { var sb = new StringBuilder(); sb.Append("Intent: ").AppendLine(input.Intent); @@ -153,9 +160,144 @@ private static string BuildUserPrompt(BrowserTaskInput input) sb.Append("Step budget: ").Append(input.MaxSteps).Append(" steps / ").Append(input.MaxSeconds).AppendLine("s wall-clock."); if (!string.IsNullOrWhiteSpace(input.CredentialId)) sb.AppendLine("A credential id was provided but is not yet exposed as a tool. If authentication is required, call fail()."); + if (!string.IsNullOrWhiteSpace(primingText)) + { + sb.AppendLine(); + sb.Append(primingText); + } + return sb.ToString(); + } + + private async Task TryWriteLearnedSkillAsync( + BrowserTaskInput input, + BrowserTaskState state, + CancellationToken ct) + { + // Only write when we have something substantive to remember. A task + // that completed in one navigation carries no multi-step knowledge + // worth priming future runs with. + if (state.Navigations.Count < 2) + return; + + var primaryHost = input.Allowlist!.Patterns + .Select(p => p.TrimStart('*', '.')) + .FirstOrDefault(p => !string.IsNullOrEmpty(p) && p != "*"); + if (string.IsNullOrEmpty(primaryHost)) + return; + + var slug = Slugify(input.Intent!); + var name = $"sites/{primaryHost}/learned/{slug}"; + + try + { + var existing = await skillStore.GetAsync(name); + var synthesizerMessages = new List + { + new(ChatRole.System, LearnedSkillSystemPrompt), + new(ChatRole.User, BuildLearnedSkillPrompt(input, state, existing)) + }; + // No tools on this turn — the model is just summarising. + var response = await chatClient.GetResponseAsync( + synthesizerMessages, + new ChatOptions { Tools = [] }, + ct); + var text = response.Text?.Trim(); + if (string.IsNullOrWhiteSpace(text)) + return; + + var (summary, content) = SplitSummaryAndContent(text); + var skill = new Skill( + Name: name, + Summary: summary, + Content: content, + CreatedAt: existing?.CreatedAt ?? DateTimeOffset.UtcNow, + UpdatedAt: existing is null ? null : DateTimeOffset.UtcNow, + LastUsedAt: DateTimeOffset.UtcNow, + SeeAlso: [$"sites/{primaryHost}/login"]); + await skillStore.SaveAsync(skill); + logger.LogInformation("Wrote learned skill '{Name}' ({Nav} navigations, {Steps} steps).", + name, state.Navigations.Count, state.Steps); + } + catch (OperationCanceledException) when (ct.IsCancellationRequested) + { + // Budget was exhausted during synthesis; the task itself already + // succeeded, so swallow and move on. + } + catch (Exception ex) + { + logger.LogWarning(ex, "Failed to write learned skill '{Name}'.", name); + } + } + + private const string LearnedSkillSystemPrompt = """ + You are writing a reusable skill for a browser-automation agent after a task just succeeded. + + Output format: + - Line 1: a single-sentence summary of 15 words or less. + - Line 2: blank. + - Remainder: markdown explaining the flow so a future planner can repeat it efficiently. Cover: landing URL, key steps, selectors or labels that actually worked, known pitfalls. + + Rules: + - Do NOT include credential values, secrets, or any specific text the user typed into fields. + - Do NOT fabricate details the trace does not show. + - Prefer the phrases a planner would search for later (site name, feature name). + - Keep it under ~400 words. + """; + + private static string BuildLearnedSkillPrompt(BrowserTaskInput input, BrowserTaskState state, Skill? existing) + { + var sb = new StringBuilder(); + sb.Append("Intent: ").AppendLine(input.Intent); + sb.Append("Allowed hosts: ").AppendLine(string.Join(", ", input.Allowlist!.Patterns)); + sb.Append("Navigations (in order): ").AppendLine(string.Join(" → ", state.Navigations.Select(u => u.ToString()))); + sb.Append("Tool calls: ").Append(state.Steps).AppendLine("."); + if (!string.IsNullOrWhiteSpace(state.Summary)) + sb.Append("Final summary: ").AppendLine(state.Summary); + if (!string.IsNullOrWhiteSpace(state.Result)) + sb.Append("Final result: ").AppendLine(state.Result); + if (existing is not null) + { + sb.AppendLine(); + sb.AppendLine("A prior version of this skill exists — refine rather than rewrite when the content overlaps:"); + sb.AppendLine(existing.Content); + } return sb.ToString(); } + private static (string Summary, string Content) SplitSummaryAndContent(string text) + { + var lines = text.Split('\n'); + var summary = lines[0].Trim(); + if (summary.Length > 200) + summary = summary[..200]; + var content = lines.Length > 1 + ? string.Join('\n', lines.Skip(1)).TrimStart('\n', '\r', ' ') + : text; + return (summary, string.IsNullOrWhiteSpace(content) ? text : content); + } + + private static string Slugify(string intent) + { + var sb = new StringBuilder(capacity: Math.Min(intent.Length, 64)); + var lastDash = true; + foreach (var ch in intent.ToLowerInvariant()) + { + if (char.IsLetterOrDigit(ch)) + { + sb.Append(ch); + lastDash = false; + } + else if (!lastDash && sb.Length < 64) + { + sb.Append('-'); + lastDash = true; + } + if (sb.Length >= 64) break; + } + var slug = sb.ToString().Trim('-'); + return string.IsNullOrEmpty(slug) ? "task" : slug; + } + private static AgentTaskResult BuildResult( AgentTaskRequest request, BrowserTaskInput input, diff --git a/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs b/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs new file mode 100644 index 0000000..c47bda6 --- /dev/null +++ b/src/Foragent.Capabilities/BrowserTask/BrowserTaskPriming.cs @@ -0,0 +1,132 @@ +using Microsoft.Extensions.AI; +using Microsoft.Extensions.Logging; +using RockBot.Host; + +namespace Foragent.Capabilities.BrowserTask; + +/// +/// Retrieves learned site knowledge for a browser-task invocation and +/// formats it as a block for the planner's user prompt (spec §5.6). Queries +/// both and in +/// parallel; hybrid BM25 + vector when an +/// is in DI, BM25-only otherwise. +/// +/// Isolated from so tests can inject +/// fake stores without going through the capability's full execute path. +/// +public sealed class BrowserTaskPriming( + ISkillStore skillStore, + ILongTermMemory longTermMemory, + IEmbeddingGenerator>? embeddingGenerator, + ILogger logger) +{ + public const int MaxSkills = 5; + public const int MaxMemories = 5; + + public async Task BuildAsync( + string intent, + HostAllowlist allowlist, + CancellationToken cancellationToken) + { + var primaryHost = allowlist.Patterns + .Select(p => p.TrimStart('*', '.')) + .FirstOrDefault(p => !string.IsNullOrEmpty(p) && p != "*"); + var query = string.IsNullOrEmpty(primaryHost) + ? intent + : $"{intent} site:{primaryHost}"; + + var embedding = await TryEmbedAsync(query, cancellationToken); + + var skillsTask = SafeSearchSkillsAsync(query, embedding, cancellationToken); + var memoriesTask = SafeSearchMemoriesAsync(query, primaryHost, embedding, cancellationToken); + await Task.WhenAll(skillsTask, memoriesTask); + + var skills = skillsTask.Result; + var memories = memoriesTask.Result; + + if (skills.Count == 0 && memories.Count == 0) + return null; + + var sb = new System.Text.StringBuilder(); + sb.AppendLine("Known site knowledge (from prior runs and operator primers):"); + foreach (var s in skills) + { + sb.Append("- [skill] ").Append(s.Name).Append(": ").AppendLine(s.Summary); + if (!string.IsNullOrWhiteSpace(s.Content)) + { + sb.AppendLine(Indent(Trim(s.Content, 1500))); + } + } + foreach (var m in memories) + { + sb.Append("- [memory] ").AppendLine(Trim(m.Content, 400)); + } + sb.AppendLine("Treat these as hints, not ground truth — re-snapshot to confirm selectors and URLs."); + return sb.ToString(); + } + + private async Task TryEmbedAsync(string query, CancellationToken ct) + { + if (embeddingGenerator is null) + return null; + try + { + var result = await embeddingGenerator.GenerateAsync([query], cancellationToken: ct); + return result.Count > 0 ? result[0].Vector.ToArray() : null; + } + catch (Exception ex) + { + logger.LogDebug(ex, "Embedding generation failed; falling back to BM25-only retrieval."); + return null; + } + } + + private async Task> SafeSearchSkillsAsync( + string query, float[]? embedding, CancellationToken ct) + { + try + { + return await skillStore.SearchAsync(query, MaxSkills, ct, embedding); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Skill search failed; continuing without skill priming."); + return []; + } + } + + private async Task> SafeSearchMemoriesAsync( + string query, string? primaryHost, float[]? embedding, CancellationToken ct) + { + try + { + var criteria = new MemorySearchCriteria( + Query: query, + Category: primaryHost is null ? null : $"sites/{primaryHost}", + Tags: [], + CreatedAfter: null, + CreatedBefore: null, + MaxResults: MaxMemories, + QueryEmbedding: embedding!); + return await longTermMemory.SearchAsync(criteria, ct); + } + catch (Exception ex) + { + logger.LogDebug(ex, "Memory search failed; continuing without memory priming."); + return []; + } + } + + private static string Trim(string text, int max) + { + if (string.IsNullOrEmpty(text) || text.Length <= max) + return text; + return text[..max] + "…"; + } + + private static string Indent(string text) + { + var lines = text.Split('\n'); + return string.Join('\n', lines.Select(l => " " + l)); + } +} diff --git a/src/Foragent.Capabilities/Foragent.Capabilities.csproj b/src/Foragent.Capabilities/Foragent.Capabilities.csproj index 30e7579..9f1e323 100644 --- a/src/Foragent.Capabilities/Foragent.Capabilities.csproj +++ b/src/Foragent.Capabilities/Foragent.Capabilities.csproj @@ -6,6 +6,7 @@ + diff --git a/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs b/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs index e2ea5a1..ff6de1c 100644 --- a/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs +++ b/src/Foragent.Capabilities/ForagentCapabilitiesServiceCollectionExtensions.cs @@ -1,5 +1,4 @@ using Foragent.Capabilities.BrowserTask; -using Foragent.Capabilities.SitePosting; using Microsoft.Extensions.DependencyInjection; using RockBot.A2A; @@ -16,9 +15,8 @@ public static IServiceCollection AddForagentCapabilities(this IServiceCollection { services.AddScoped(); services.AddScoped(); - services.AddScoped(); services.AddScoped(); - services.AddScoped(); + services.AddScoped(); services.AddScoped(); return services; } @@ -36,7 +34,6 @@ public static class ForagentCapabilities [ BrowserTaskCapability.SkillDefinition, FetchPageTitleCapability.SkillDefinition, - ExtractStructuredDataCapability.SkillDefinition, - PostToSiteCapability.SkillDefinition + ExtractStructuredDataCapability.SkillDefinition ]; } diff --git a/src/Foragent.Capabilities/PostToSiteCapability.cs b/src/Foragent.Capabilities/PostToSiteCapability.cs deleted file mode 100644 index 6a98c3a..0000000 --- a/src/Foragent.Capabilities/PostToSiteCapability.cs +++ /dev/null @@ -1,155 +0,0 @@ -using System.Text.Json; -using Foragent.Browser; -using Foragent.Capabilities.SitePosting; -using Foragent.Credentials; -using Microsoft.Extensions.Logging; -using RockBot.A2A; - -namespace Foragent.Capabilities; - -/// -/// Authenticates against a configured site and posts content (spec §5.1). -/// Site-specific work lives behind ; this capability -/// handles input parsing, broker lookup, session creation, and error shaping. -/// Credential material never appears in the returned -/// (spec §6.1). -/// -public sealed class PostToSiteCapability( - IBrowserSessionFactory browserFactory, - ICredentialBroker credentialBroker, - IEnumerable posters, - ILogger logger) : ICapability -{ - public static AgentSkill SkillDefinition { get; } = new() - { - Id = "post-to-site", - Name = "Post to Site", - Description = "Authenticate against a configured site (using a credential identifier) and publish a post. " - + "Input: JSON {\"site\":\"bluesky\",\"credentialId\":\"...\",\"content\":\"...\"} " - + "or metadata fields site / credentialId / content." - }; - - private readonly IReadOnlyDictionary _postersBySite = - posters.ToDictionary(p => p.Site, StringComparer.OrdinalIgnoreCase); - - public string SkillId => SkillDefinition.Id; - public AgentSkill Skill => SkillDefinition; - - public async Task ExecuteAsync(AgentTaskRequest request, AgentTaskContext context) - { - var ct = context.MessageContext.CancellationToken; - var input = PostToSiteInput.Parse(request); - - if (input.Error is not null) - return CapabilityResult.Error(request, input.Error); - - if (!_postersBySite.TryGetValue(input.Site!, out var poster)) - { - var known = string.Join(", ", _postersBySite.Keys.OrderBy(k => k)); - return CapabilityResult.Error( - request, - $"No poster configured for site '{input.Site}'. Known sites: {known}"); - } - - CredentialReference credential; - try - { - credential = await credentialBroker.ResolveAsync(input.CredentialId!, ct); - } - catch (CredentialNotFoundException ex) - { - logger.LogWarning("Credential '{CredentialId}' not found", ex.CredentialId); - return CapabilityResult.Error(request, $"Credential '{ex.CredentialId}' is not configured."); - } - - try - { - await using var session = await browserFactory.CreateSessionAsync(ct); - await poster.PostAsync(session, credential, input.Content!, ct); - return CapabilityResult.Completed(request, $"Posted to {poster.Site}."); - } - catch (Exception ex) when (ex is not OperationCanceledException) - { - // Never echo exception messages verbatim — site posters should not - // embed credentials in them, but belt-and-suspenders since these go - // back to the caller. Log the full exception for operator debugging. - logger.LogWarning(ex, "Post to {Site} failed for credential {CredentialId}", - poster.Site, credential.Id); - return CapabilityResult.Error(request, $"Post to {poster.Site} failed."); - } - } -} - -/// -/// Parses the post-to-site input shape. Accepts either: -/// -/// A JSON object in the first text part: {"site":"...","credentialId":"...","content":"..."}. -/// Individual fields via message or request metadata (rockbot 0.8.5+): -/// site, credentialId, content. Metadata overrides JSON when both are present. -/// -/// No URL-extraction fallback — post-to-site is structured enough that bare -/// text input would be ambiguous. Unparseable input yields . -/// -internal readonly record struct PostToSiteInput( - string? Site, string? CredentialId, string? Content, string? Error) -{ - public static PostToSiteInput Parse(AgentTaskRequest request) - { - string? site = null; - string? credentialId = null; - string? content = null; - - var text = request.Message.Parts - .Where(p => p.Kind == "text") - .Select(p => p.Text) - .FirstOrDefault(t => !string.IsNullOrWhiteSpace(t)) - ?.Trim(); - - if (!string.IsNullOrEmpty(text) && text.StartsWith('{')) - { - try - { - using var doc = JsonDocument.Parse(text); - var root = doc.RootElement; - if (root.TryGetProperty("site", out var s)) site = s.GetString(); - if (root.TryGetProperty("credentialId", out var c)) credentialId = c.GetString(); - if (root.TryGetProperty("content", out var p)) content = p.GetString(); - } - catch (JsonException) - { - return new PostToSiteInput(null, null, null, - "Input must be a JSON object with site, credentialId, and content fields."); - } - } - - site = ReadMetadata(request, "site") ?? site; - credentialId = ReadMetadata(request, "credentialId") ?? credentialId; - content = ReadMetadata(request, "content") ?? content; - - if (string.IsNullOrWhiteSpace(site)) - return new PostToSiteInput(null, null, null, "Missing 'site' (e.g. 'bluesky')."); - if (string.IsNullOrWhiteSpace(credentialId)) - return new PostToSiteInput(null, null, null, "Missing 'credentialId'."); - if (string.IsNullOrWhiteSpace(content)) - return new PostToSiteInput(null, null, null, "Missing 'content'."); - - return new PostToSiteInput(site, credentialId, content, null); - } - - private static string? ReadMetadata(AgentTaskRequest request, string key) - { - if (request.Message.Metadata is not null - && request.Message.Metadata.TryGetValue(key, out var msgValue) - && !string.IsNullOrWhiteSpace(msgValue)) - { - return msgValue; - } - if (request.Metadata is not null - && request.Metadata.TryGetValue(key, out var reqValue) - && !string.IsNullOrWhiteSpace(reqValue)) - { - return reqValue; - } - return null; - } -} diff --git a/src/Foragent.Capabilities/SitePosting/BlueskySitePoster.cs b/src/Foragent.Capabilities/SitePosting/BlueskySitePoster.cs deleted file mode 100644 index 5339526..0000000 --- a/src/Foragent.Capabilities/SitePosting/BlueskySitePoster.cs +++ /dev/null @@ -1,110 +0,0 @@ -using Foragent.Browser; -using Foragent.Credentials; -using Microsoft.Extensions.Logging; - -namespace Foragent.Capabilities.SitePosting; - -/// -/// Drives the Bluesky web UI (bsky.app) to post content on behalf of a user -/// authenticated with an app password (spec §6.6 prefers app passwords where -/// available). Uses stable accessibility-role selectors rather than CSS so -/// minor UI tweaks don't break the flow — selectors are still inherently -/// fragile and are flagged in docs/framework-feedback.md. -/// -/// -/// Expects with keys identifier -/// (handle or email) and password (app password). Does not persist -/// storageState yet — every post re-authenticates; spec §6.5's -/// session-as-credential flow is deferred. -/// -public sealed class BlueskySitePoster : ISitePoster -{ - public const string SiteId = "bluesky"; - - private static readonly Uri DefaultLoginUrl = new("https://bsky.app/"); - private static readonly TimeSpan InteractiveTimeout = TimeSpan.FromSeconds(30); - - private readonly ILogger logger; - private readonly Uri loginUrl; - - // DI-friendly: defaults to the real bsky.app. Tests use the Uri overload - // to point at a local Kestrel-hosted fake login + compose UI. - public BlueskySitePoster(ILogger logger) - : this(logger, DefaultLoginUrl) { } - - public BlueskySitePoster(ILogger logger, Uri loginUrl) - { - this.logger = logger; - this.loginUrl = loginUrl; - } - - // Accessibility-role + attribute selectors. Playwright's string-selector - // dialect does not accept regex; for flexibility across the real bsky.app - // and the fake test UI we pick stable exact strings and update them here - // when Bluesky's copy changes. Flagged as fragile in docs/framework-feedback.md. - private const string SignInButton = "role=button[name=\"Sign in\"]"; - private const string IdentifierField = "input[placeholder=\"Username or email address\"]"; - private const string PasswordField = "input[placeholder=\"Password\"]"; - private const string SubmitLoginButton = "role=button[name=\"Next\"]"; - private const string ComposeButton = "role=button[name=\"New post\"]"; - private const string ComposeEditor = "[contenteditable=\"true\"]"; - private const string PublishButton = "role=button[name=\"Post\"]"; - private const string HomeFeedHeading = "role=heading[name=\"Home\"]"; - - public string Site => SiteId; - - public async Task PostAsync( - IBrowserSession session, - CredentialReference credential, - string content, - CancellationToken cancellationToken) - { - ArgumentNullException.ThrowIfNull(session); - ArgumentNullException.ThrowIfNull(credential); - if (string.IsNullOrWhiteSpace(content)) - throw new ArgumentException("Post content cannot be empty.", nameof(content)); - - var identifier = credential.RequireText("identifier"); - var password = credential.RequireText("password"); - - logger.LogInformation( - "Posting to Bluesky as '{Identifier}' (credential {CredentialId}, {Length} chars)", - identifier, credential.Id, content.Length); - - await using var page = await session.OpenPageAsync(loginUrl, cancellationToken); - - await SignInAsync(page, identifier, password, cancellationToken); - await ComposeAsync(page, content, cancellationToken); - } - - private async Task SignInAsync( - IBrowserPage page, string identifier, string password, CancellationToken ct) - { - await page.WaitForSelectorAsync(SignInButton, InteractiveTimeout, ct); - await page.ClickAsync(SignInButton, ct); - - await page.WaitForSelectorAsync(IdentifierField, InteractiveTimeout, ct); - await page.FillAsync(IdentifierField, identifier, ct); - await page.FillAsync(PasswordField, password, ct); - await page.ClickAsync(SubmitLoginButton, ct); - - await page.WaitForSelectorAsync(HomeFeedHeading, InteractiveTimeout, ct); - logger.LogInformation("Bluesky login succeeded for '{Identifier}'", identifier); - } - - private async Task ComposeAsync(IBrowserPage page, string content, CancellationToken ct) - { - await page.WaitForSelectorAsync(ComposeButton, InteractiveTimeout, ct); - await page.ClickAsync(ComposeButton, ct); - - await page.WaitForSelectorAsync(ComposeEditor, InteractiveTimeout, ct); - await page.FillAsync(ComposeEditor, content, ct); - - await page.ClickAsync(PublishButton, ct); - - // Publish closes the composer and returns to the home feed; wait for - // the composer to disappear as the success signal. - await page.WaitForSelectorAsync(HomeFeedHeading, InteractiveTimeout, ct); - logger.LogInformation("Bluesky post published ({Length} chars)", content.Length); - } -} diff --git a/src/Foragent.Capabilities/SitePosting/ISitePoster.cs b/src/Foragent.Capabilities/SitePosting/ISitePoster.cs deleted file mode 100644 index 5e642a5..0000000 --- a/src/Foragent.Capabilities/SitePosting/ISitePoster.cs +++ /dev/null @@ -1,35 +0,0 @@ -using Foragent.Browser; -using Foragent.Credentials; - -namespace Foragent.Capabilities.SitePosting; - -/// -/// Site-specific driver behind the generic post-to-site capability. One -/// implementation per site family (Bluesky, Mastodon, …). The capability -/// resolves an by matching to the -/// site input field, so site dispatch stays out of the capability. -/// -/// -/// Not yet lifted to RockBot.A2A — it's Foragent-local until a second -/// framework consumer has the same shape. Noted in docs/framework-feedback.md. -/// -public interface ISitePoster -{ - /// - /// Case-insensitive site identifier (e.g. bluesky, mastodon). - /// Matches the site input sent by the caller. - /// - string Site { get; } - - /// - /// Authenticates (using ) and posts - /// . Implementations must not log credential - /// values or password form fields. Throws on failure; exception messages - /// must not contain credential material. - /// - Task PostAsync( - IBrowserSession session, - CredentialReference credential, - string content, - CancellationToken cancellationToken); -} diff --git a/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs b/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs index 902c6da..e93b3b0 100644 --- a/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs +++ b/tests/Foragent.Agent.Tests/BrowserTask/BrowserTaskCapabilityTests.cs @@ -4,6 +4,7 @@ using Microsoft.Extensions.AI; using Microsoft.Extensions.Logging.Abstractions; using RockBot.A2A; +using RockBot.Host; using Xunit; namespace Foragent.Agent.Tests.BrowserTask; @@ -142,6 +143,21 @@ public async Task NavigateTool_RejectsOffAllowlistHosts() private static (BrowserTaskCapability Capability, FakeBrowserAgentPage Page, FakeAgentBrowserSessionFactory Factory) Build( params ChatResponse[] script) + { + var (cap, page, factory, _, _, _) = BuildFull(new FakeSkillStore(), new FakeLongTermMemory(), script); + return (cap, page, factory); + } + + internal static ( + BrowserTaskCapability Capability, + FakeBrowserAgentPage Page, + FakeAgentBrowserSessionFactory Factory, + ScriptedChatClient Scripted, + FakeSkillStore Skills, + FakeLongTermMemory Memory) BuildFull( + FakeSkillStore skillStore, + FakeLongTermMemory memory, + params ChatResponse[] script) { var page = new FakeBrowserAgentPage(); var factory = new FakeAgentBrowserSessionFactory(page); @@ -152,14 +168,107 @@ private static (BrowserTaskCapability Capability, FakeBrowserAgentPage Page, Fak .Build(); var broker = new StubCredentialBroker(); + var priming = new BrowserTaskPriming( + skillStore, + memory, + embeddingGenerator: null, + NullLogger.Instance); var capability = new BrowserTaskCapability( factory, wrapped, broker, + priming, + skillStore, NullLogger.Instance); - return (capability, page, factory); + return (capability, page, factory, scripted, skillStore, memory); + } + + // ── Step 7: learning-substrate tests ──────────────────────────────────── + + [Fact] + public async Task Priming_InjectsRetrievedSkillIntoUserPrompt() + { + var skills = new FakeSkillStore(); + await skills.SaveAsync(new Skill( + Name: "sites/example.com/login", + Summary: "Use the app password, not the account password.", + Content: "Click 'Sign in', enter handle, then password.", + CreatedAt: DateTimeOffset.UtcNow, + UpdatedAt: null, + LastUsedAt: null, + SeeAlso: [])); + + var (capability, _, _, scripted, _, _) = BuildFull( + skills, + new FakeLongTermMemory(), + ScriptedChatClient.ToolCall("done", new { summary = "primed" }), + ScriptedChatClient.Text("stopping")); + var (ctx, _) = TestContext.Build(); + + await capability.ExecuteAsync( + TestContext.Request("browser-task", + """{"intent":"sign in somewhere","allowedHosts":["example.com"]}"""), + ctx); + + var userMessage = scripted.FirstMessages.Single(m => m.Role == ChatRole.User).Text ?? string.Empty; + Assert.Contains("Known site knowledge", userMessage); + Assert.Contains("sites/example.com/login", userMessage); + Assert.Contains("app password", userMessage); + } + + [Fact] + public async Task LearnedSkill_IsWrittenOnSuccess_WhenMultipleNavigations() + { + var skills = new FakeSkillStore(); + + // Two turns of tool calls drive two navigations, then done() triggers + // the learned-skill synthesis turn. That synthesis call is the 4th + // scripted response (summary on line 1, content after a blank line). + var (capability, _, _, _, _, _) = BuildFull( + skills, + new FakeLongTermMemory(), + ScriptedChatClient.ToolCall("navigate", new { url = "https://example.com/page1" }), + ScriptedChatClient.ToolCall("navigate", new { url = "https://example.com/page2" }), + ScriptedChatClient.ToolCall("done", new { summary = "clicked through two pages" }), + ScriptedChatClient.Text("stopping"), + // Synthesis turn — separate GetResponseAsync invocation, no tools. + ScriptedChatClient.Text("Navigate home then click the details link.\n\n## Flow\n1. open /page1\n2. click through to /page2")); + var (ctx, _) = TestContext.Build(); + + var result = await capability.ExecuteAsync( + TestContext.Request("browser-task", + """{"intent":"follow the page chain","allowedHosts":["example.com"]}"""), + ctx); + + Assert.Equal(AgentTaskState.Completed, result.State); + var learned = skills.Saved.Keys.SingleOrDefault(k => k.StartsWith("sites/example.com/learned/")); + Assert.NotNull(learned); + var skill = skills.Saved[learned!]; + Assert.Equal("Navigate home then click the details link.", skill.Summary); + Assert.Contains("Flow", skill.Content); + Assert.Contains("page1", skill.Content); + } + + [Fact] + public async Task LearnedSkill_IsNotWritten_OnSingleNavigationTask() + { + var skills = new FakeSkillStore(); + + var (capability, _, _, _, _, _) = BuildFull( + skills, + new FakeLongTermMemory(), + ScriptedChatClient.ToolCall("done", new { summary = "read it", result = "ok" }), + ScriptedChatClient.Text("stopping")); + var (ctx, _) = TestContext.Build(); + + await capability.ExecuteAsync( + TestContext.Request("browser-task", + """{"intent":"read one page","url":"https://example.com/","allowedHosts":["example.com"]}"""), + ctx); + + Assert.DoesNotContain(skills.Saved.Keys, k => k.StartsWith("sites/example.com/learned/")); } private sealed class StubCredentialBroker : ICredentialBroker diff --git a/tests/Foragent.Agent.Tests/BrowserTask/ScriptedChatClient.cs b/tests/Foragent.Agent.Tests/BrowserTask/ScriptedChatClient.cs index 3c3aab2..a687ebe 100644 --- a/tests/Foragent.Agent.Tests/BrowserTask/ScriptedChatClient.cs +++ b/tests/Foragent.Agent.Tests/BrowserTask/ScriptedChatClient.cs @@ -15,6 +15,12 @@ internal sealed class ScriptedChatClient : IChatClient public int Turns { get; private set; } + /// Messages passed on the most recent call. + public IReadOnlyList LastMessages { get; private set; } = []; + + /// Messages passed on the first call — the planner's initial prompt, before any tool results. + public IReadOnlyList FirstMessages { get; private set; } = []; + public ScriptedChatClient(params ChatResponse[] responses) { _responses = new Queue(responses); @@ -38,7 +44,11 @@ public Task GetResponseAsync( ChatOptions? options = null, CancellationToken cancellationToken = default) { + var captured = messages.ToArray(); Turns++; + LastMessages = captured; + if (FirstMessages.Count == 0) + FirstMessages = captured; if (_responses.Count == 0) return Task.FromResult(Text("(script exhausted — stopping)")); return Task.FromResult(_responses.Dequeue()); diff --git a/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs b/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs new file mode 100644 index 0000000..5a8c542 --- /dev/null +++ b/tests/Foragent.Agent.Tests/BskySeedSkillServiceTests.cs @@ -0,0 +1,62 @@ +using Microsoft.Extensions.Logging.Abstractions; +using RockBot.Host; +using Xunit; + +namespace Foragent.Agent.Tests; + +/// +/// Covers the idempotency contract spec'd in : +/// the seed is written once when absent, untouched when present. +/// +public class BskySeedSkillServiceTests +{ + [Fact] + public async Task Seed_IsWritten_WhenSkillMissing() + { + var store = new FakeSkillStore(); + var service = new BskySeedSkillService(store, NullLogger.Instance); + + await service.StartAsync(CancellationToken.None); + + Assert.True(store.Saved.ContainsKey("sites/bsky.app/login")); + var skill = store.Saved["sites/bsky.app/login"]; + Assert.Contains("app password", skill.Summary, StringComparison.OrdinalIgnoreCase); + Assert.Contains("sites/bsky.app/compose-post", skill.SeeAlso!); + } + + [Fact] + public async Task Seed_LeavesExistingSkillIntact() + { + var store = new FakeSkillStore(); + var existing = new Skill( + Name: "sites/bsky.app/login", + Summary: "operator-edited summary", + Content: "operator-edited content", + CreatedAt: DateTimeOffset.UtcNow.AddDays(-7), + UpdatedAt: null, + LastUsedAt: null, + SeeAlso: []); + await store.SaveAsync(existing); + + var service = new BskySeedSkillService(store, NullLogger.Instance); + await service.StartAsync(CancellationToken.None); + + var after = store.Saved["sites/bsky.app/login"]; + Assert.Equal("operator-edited summary", after.Summary); + Assert.Equal("operator-edited content", after.Content); + } + + [Fact] + public async Task Seed_IsNoop_OnSecondStart() + { + var store = new FakeSkillStore(); + var service = new BskySeedSkillService(store, NullLogger.Instance); + + await service.StartAsync(CancellationToken.None); + var firstContent = store.Saved["sites/bsky.app/login"].Content; + await service.StartAsync(CancellationToken.None); + var secondContent = store.Saved["sites/bsky.app/login"].Content; + + Assert.Same(firstContent, secondContent); + } +} diff --git a/tests/Foragent.Agent.Tests/PostToSiteCapabilityTests.cs b/tests/Foragent.Agent.Tests/PostToSiteCapabilityTests.cs deleted file mode 100644 index 32b4621..0000000 --- a/tests/Foragent.Agent.Tests/PostToSiteCapabilityTests.cs +++ /dev/null @@ -1,205 +0,0 @@ -using Foragent.Browser; -using Foragent.Capabilities; -using Foragent.Capabilities.SitePosting; -using Foragent.Credentials; -using Microsoft.Extensions.Logging.Abstractions; -using RockBot.A2A; -using Xunit; - -namespace Foragent.Agent.Tests; - -public class PostToSiteCapabilityTests -{ - [Fact] - public async Task DispatchesToSitePoster_OnSuccess() - { - var poster = new CapturingPoster("bluesky"); - var capability = Build(poster, broker: SingleCredential("rockbot/social/bluesky-rocky")); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", - """{"site":"bluesky","credentialId":"rockbot/social/bluesky-rocky","content":"hello world"}"""), - context); - - Assert.Equal(AgentTaskState.Completed, result.State); - Assert.Equal("Posted to bluesky.", TestContext.TextOf(result)); - Assert.Equal("hello world", poster.LastContent); - Assert.Equal("rockbot/social/bluesky-rocky", poster.LastCredentialId); - } - - [Fact] - public async Task AcceptsInput_FromMetadata() - { - var poster = new CapturingPoster("bluesky"); - var capability = Build(poster, broker: SingleCredential("cred-id")); - var (context, _) = TestContext.Build(); - var request = TestContext.RequestWithMetadata( - "post-to-site", - messageMetadata: new Dictionary - { - ["site"] = "bluesky", - ["credentialId"] = "cred-id", - ["content"] = "via metadata" - }); - - var result = await capability.ExecuteAsync(request, context); - - Assert.Equal(AgentTaskState.Completed, result.State); - Assert.Equal("via metadata", poster.LastContent); - } - - [Fact] - public async Task ReportsMissingCredential_WithoutCreatingSession() - { - var poster = new CapturingPoster("bluesky"); - var factory = new StubBrowserSessionFactory(); - var capability = new PostToSiteCapability( - factory, - new StubCredentialBroker(), - [poster], - NullLogger.Instance); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", - """{"site":"bluesky","credentialId":"ghost","content":"hi"}"""), - context); - - Assert.Equal(0, factory.SessionsCreated); - Assert.Contains("'ghost'", TestContext.TextOf(result)); - Assert.Contains("not configured", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsUnknownSite() - { - var poster = new CapturingPoster("bluesky"); - var capability = Build(poster, broker: SingleCredential("cred-id")); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", - """{"site":"mastodon","credentialId":"cred-id","content":"hi"}"""), - context); - - Assert.Contains("mastodon", TestContext.TextOf(result)); - Assert.Contains("Known sites", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsInvalidJson() - { - var poster = new CapturingPoster("bluesky"); - var capability = Build(poster, broker: SingleCredential("cred-id")); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", "{not json"), - context); - - Assert.Contains("JSON", TestContext.TextOf(result)); - } - - [Fact] - public async Task ReportsMissingFields() - { - var poster = new CapturingPoster("bluesky"); - var capability = Build(poster, broker: SingleCredential("cred-id")); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", """{"site":"bluesky"}"""), - context); - - Assert.Contains("credentialId", TestContext.TextOf(result)); - } - - [Fact] - public async Task ScrubsExceptionMessage_OnPosterFailure() - { - // If a poster throws with credential-shaped text in the message, the - // capability must NOT echo it back — the caller sees a generic - // failure message; the full exception is only logged. - var poster = new ThrowingPoster("bluesky", "secret-pw-leak"); - var capability = Build(poster, broker: SingleCredential("cred-id")); - var (context, _) = TestContext.Build(); - - var result = await capability.ExecuteAsync( - TestContext.Request("post-to-site", - """{"site":"bluesky","credentialId":"cred-id","content":"hi"}"""), - context); - - var text = TestContext.TextOf(result); - Assert.Equal("Post to bluesky failed.", text); - Assert.DoesNotContain("secret-pw-leak", text); - } - - private static PostToSiteCapability Build( - ISitePoster poster, - ICredentialBroker broker) - { - var factory = new StubBrowserSessionFactory(); - return new PostToSiteCapability( - factory, - broker, - [poster], - NullLogger.Instance); - } - - private static StubCredentialBroker SingleCredential(string id) => - new() - { - Credentials = - { - [id] = CredentialReference.FromText(id, "username-password", - new Dictionary - { - ["identifier"] = "u", - ["password"] = "p" - }) - } - }; - - private sealed class CapturingPoster(string site) : ISitePoster - { - public string Site { get; } = site; - public string? LastContent { get; private set; } - public string? LastCredentialId { get; private set; } - - public Task PostAsync( - IBrowserSession session, - CredentialReference credential, - string content, - CancellationToken ct) - { - LastContent = content; - LastCredentialId = credential.Id; - return Task.CompletedTask; - } - } - - private sealed class ThrowingPoster(string site, string sensitiveText) : ISitePoster - { - public string Site { get; } = site; - - public Task PostAsync( - IBrowserSession session, - CredentialReference credential, - string content, - CancellationToken ct) => - throw new InvalidOperationException($"Auth failed — {sensitiveText}"); - } -} - -internal sealed class StubCredentialBroker : ICredentialBroker -{ - public Dictionary Credentials { get; } = new(); - - public Task ResolveAsync(string credentialId, CancellationToken ct = default) - { - if (!Credentials.TryGetValue(credentialId, out var cred)) - throw new CredentialNotFoundException(credentialId); - return Task.FromResult(cred); - } -} diff --git a/tests/Foragent.Agent.Tests/TestDoubles.cs b/tests/Foragent.Agent.Tests/TestDoubles.cs index 237baad..a5fb91b 100644 --- a/tests/Foragent.Agent.Tests/TestDoubles.cs +++ b/tests/Foragent.Agent.Tests/TestDoubles.cs @@ -148,8 +148,8 @@ public Task NavigateAsync(Uri url, CancellationToken ct = default) public Task FillAsync(string selector, string value, CancellationToken ct = default) { - // Record the selector but not the value — tests for post-to-site must - // never accidentally assert on password text. + // Record the selector but not the value — tests must never + // accidentally assert on values that could include sensitive input. Actions.Add($"fill:{selector}"); return Task.CompletedTask; } @@ -174,6 +174,81 @@ public Task WaitForSelectorAsync(string selector, TimeSpan? timeout = null, Canc public ValueTask DisposeAsync() => ValueTask.CompletedTask; } +/// +/// In-memory that ignores the query embedding and +/// returns saved skills in insertion order. Sufficient for testing the +/// priming / learned-skill paths without spinning up FileSkillStore. +/// +internal sealed class FakeSkillStore : ISkillStore +{ + public Dictionary Saved { get; } = new(StringComparer.Ordinal); + public List<(string Query, int MaxResults)> Searches { get; } = []; + + public Task SaveAsync(Skill skill) + { + Saved[skill.Name] = skill; + return Task.CompletedTask; + } + + public Task GetAsync(string name) => + Task.FromResult(Saved.TryGetValue(name, out var skill) ? skill : null); + + public Task> ListAsync() => + Task.FromResult>([.. Saved.Values]); + + public Task DeleteAsync(string name) + { + Saved.Remove(name); + return Task.CompletedTask; + } + + public Task> SearchAsync( + string query, int maxResults, CancellationToken cancellationToken = default, float[]? queryEmbedding = null) + { + Searches.Add((query, maxResults)); + return Task.FromResult>([.. Saved.Values.Take(maxResults)]); + } +} + +/// +/// In-memory ; search returns entries whose +/// content mentions the query (case-insensitive). Not intended to match the +/// FileMemoryStore ranking — just enough to drive priming tests. +/// +internal sealed class FakeLongTermMemory : ILongTermMemory +{ + public Dictionary Saved { get; } = new(StringComparer.Ordinal); + + public Task SaveAsync(MemoryEntry entry, CancellationToken cancellationToken) + { + Saved[entry.Id] = entry; + return Task.CompletedTask; + } + + public Task> SearchAsync(MemorySearchCriteria criteria, CancellationToken cancellationToken) + { + IEnumerable matches = Saved.Values; + if (!string.IsNullOrEmpty(criteria.Query)) + matches = matches.Where(m => m.Content.Contains(criteria.Query, StringComparison.OrdinalIgnoreCase)); + return Task.FromResult>([.. matches.Take(criteria.MaxResults)]); + } + + public Task GetAsync(string id, CancellationToken cancellationToken) => + Task.FromResult(Saved.TryGetValue(id, out var entry) ? entry : null); + + public Task DeleteAsync(string id, CancellationToken cancellationToken) + { + Saved.Remove(id); + return Task.CompletedTask; + } + + public Task> ListTagsAsync(CancellationToken cancellationToken) => + Task.FromResult>([.. Saved.Values.SelectMany(m => m.Tags).Distinct()]); + + public Task> ListCategoriesAsync(CancellationToken cancellationToken) => + Task.FromResult>([.. Saved.Values.Select(m => m.Category).OfType().Distinct()]); +} + internal sealed class StubChatClient(Func, ChatOptions?, Task> responder) : IChatClient { diff --git a/tests/Foragent.Browser.Tests/BlueskySitePosterIntegrationTests.cs b/tests/Foragent.Browser.Tests/BlueskySitePosterIntegrationTests.cs deleted file mode 100644 index e8a7b4c..0000000 --- a/tests/Foragent.Browser.Tests/BlueskySitePosterIntegrationTests.cs +++ /dev/null @@ -1,205 +0,0 @@ -using Foragent.Capabilities.SitePosting; -using Foragent.Credentials; -using Microsoft.AspNetCore.Builder; -using Microsoft.AspNetCore.Hosting.Server; -using Microsoft.AspNetCore.Hosting.Server.Features; -using Microsoft.AspNetCore.Http; -using Microsoft.Extensions.DependencyInjection; -using Microsoft.Extensions.Logging.Abstractions; -using Xunit; - -namespace Foragent.Browser.Tests; - -/// -/// Drives the real against a Kestrel-hosted -/// fake bsky.app-shaped login + compose UI. Validates the full login → post -/// → confirm flow through a real Chromium. The fake server mirrors the -/// selectors the poster targets (sign-in button, identifier / password -/// placeholders, compose contenteditable, post button, home feed heading). -/// -[Collection("Playwright")] -public class BlueskySitePosterIntegrationTests(TestPageServerFixture fixture) -{ - [Fact] - public async Task Posts_AfterLogin_OnHappyPath() - { - await using var fake = await FakeBlueskyServer.StartAsync( - expectedIdentifier: "rocky.bsky.social", - expectedPassword: "app-pass-xyz"); - - var poster = new BlueskySitePoster( - NullLogger.Instance, - new Uri(fake.BaseUrl + "/")); - var credential = CredentialReference.FromText( - "rockbot/social/bluesky-rocky", - "username-password", - new Dictionary - { - ["identifier"] = "rocky.bsky.social", - ["password"] = "app-pass-xyz" - }); - - await using var session = await fixture.Factory.CreateSessionAsync(); - await poster.PostAsync(session, credential, "hello from Foragent integration test", CancellationToken.None); - - Assert.Equal("hello from Foragent integration test", fake.LastPostedContent); - Assert.Equal(1, fake.SuccessfulLogins); - } - - [Fact] - public async Task Throws_WhenCredentialFieldMissing() - { - var poster = new BlueskySitePoster( - NullLogger.Instance, - new Uri("http://127.0.0.1/")); - var credential = CredentialReference.FromText( - "id", "username-password", - new Dictionary { ["identifier"] = "u" }); - - await using var session = await fixture.Factory.CreateSessionAsync(); - - var ex = await Assert.ThrowsAsync(() => - poster.PostAsync(session, credential, "hi", CancellationToken.None)); - - Assert.Contains("password", ex.Message); - } -} - -/// -/// A minimal HTML server that shapes like the Bluesky web UI enough for -/// to drive. Hand-rolled HTML keeps the test -/// deterministic — no JS frameworks, no network, no external state. -/// -internal sealed class FakeBlueskyServer : IAsyncDisposable -{ - private const string SessionCookieName = "fake_bsky_session"; - - private readonly WebApplication _app; - private readonly string _expectedIdentifier; - private readonly string _expectedPassword; - - public string BaseUrl { get; } - public string? LastPostedContent { get; private set; } - public int SuccessfulLogins { get; private set; } - - private FakeBlueskyServer(WebApplication app, string baseUrl, string identifier, string password) - { - _app = app; - BaseUrl = baseUrl; - _expectedIdentifier = identifier; - _expectedPassword = password; - } - - public static async Task StartAsync(string expectedIdentifier, string expectedPassword) - { - var builder = WebApplication.CreateEmptyBuilder(new WebApplicationOptions()); - builder.WebHost.UseKestrelCore(); - builder.WebHost.UseUrls("http://127.0.0.1:0"); - builder.Services.AddRoutingCore(); - builder.Logging.ClearProviders(); - - var app = builder.Build(); - app.UseRouting(); - - // Built first so handlers can close over the instance and write state - // directly. Routes are registered below. - FakeBlueskyServer? fake = null; - - app.MapGet("/", () => Results.Content(Landing(), "text/html")); - - app.MapGet("/login", () => Results.Content(LoginForm(), "text/html")); - - app.MapPost("/login", async (HttpContext ctx) => - { - var form = await ctx.Request.ReadFormAsync(); - var id = form["identifier"].ToString(); - var pw = form["password"].ToString(); - if (id != fake!._expectedIdentifier || pw != fake._expectedPassword) - return Results.Content(LoginForm(error: "Invalid credentials"), "text/html"); - - fake.SuccessfulLogins++; - ctx.Response.Cookies.Append(SessionCookieName, "ok"); - return Results.Redirect("/home"); - }); - - app.MapGet("/home", (HttpContext ctx) => - { - if (ctx.Request.Cookies[SessionCookieName] != "ok") - return Results.Redirect("/login"); - return Results.Content(Home(), "text/html"); - }); - - app.MapGet("/compose", (HttpContext ctx) => - { - if (ctx.Request.Cookies[SessionCookieName] != "ok") - return Results.Redirect("/login"); - return Results.Content(Compose(), "text/html"); - }); - - app.MapPost("/compose", async (HttpContext ctx) => - { - if (ctx.Request.Cookies[SessionCookieName] != "ok") - return Results.Redirect("/login"); - var form = await ctx.Request.ReadFormAsync(); - fake!.LastPostedContent = form["content"].ToString(); - return Results.Redirect("/home"); - }); - - await app.StartAsync(); - var server = app.Services.GetRequiredService(); - var baseUrl = server.Features.Get()!.Addresses.First().TrimEnd('/'); - - fake = new FakeBlueskyServer(app, baseUrl, expectedIdentifier, expectedPassword); - return fake; - } - - public async ValueTask DisposeAsync() => await _app.DisposeAsync(); - - // ── HTML fragments — minimal but shaped for the poster's selectors ────── - - private static string Landing() => """ - Bluesky - -

Welcome

- Sign in - - """; - - private static string LoginForm(string? error = null) => $$""" - Sign in - -
- - - -
- {{(error is null ? "" : $"
{error}
")}} - - """; - - private static string Home() => """ - Home - Bluesky - -

Home

- New post - - """; - - private static string Compose() => """ - Compose - Bluesky - -
-
- - -
- - - """; -} diff --git a/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs b/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs index e2da5b1..82c6bb1 100644 --- a/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs +++ b/tests/Foragent.Browser.Tests/BrowserTaskIntegrationTests.cs @@ -167,13 +167,49 @@ private BrowserTaskCapability BuildCapability(LlmConfig config) .UseFunctionInvocation() .Build(); + var skillStore = new NoopSkillStore(); + var memory = new NoopLongTermMemory(); + var priming = new BrowserTaskPriming( + skillStore, + memory, + embeddingGenerator: null, + NullLogger.Instance); + return new BrowserTaskCapability( fixture.Factory, chatClient, new NoCredentialsBroker(), + priming, + skillStore, NullLogger.Instance); } + private sealed class NoopSkillStore : ISkillStore + { + public Task SaveAsync(Skill skill) => Task.CompletedTask; + public Task GetAsync(string name) => Task.FromResult(null); + public Task> ListAsync() => + Task.FromResult>([]); + public Task DeleteAsync(string name) => Task.CompletedTask; + public Task> SearchAsync( + string query, int maxResults, CancellationToken cancellationToken = default, float[]? queryEmbedding = null) => + Task.FromResult>([]); + } + + private sealed class NoopLongTermMemory : ILongTermMemory + { + public Task SaveAsync(MemoryEntry entry, CancellationToken cancellationToken) => Task.CompletedTask; + public Task> SearchAsync(MemorySearchCriteria criteria, CancellationToken cancellationToken) => + Task.FromResult>([]); + public Task GetAsync(string id, CancellationToken cancellationToken) => + Task.FromResult(null); + public Task DeleteAsync(string id, CancellationToken cancellationToken) => Task.CompletedTask; + public Task> ListTagsAsync(CancellationToken cancellationToken) => + Task.FromResult>([]); + public Task> ListCategoriesAsync(CancellationToken cancellationToken) => + Task.FromResult>([]); + } + private static AgentTaskRequest Request(string json) => new() { TaskId = Guid.NewGuid().ToString(), diff --git a/tests/Foragent.Browser.Tests/Foragent.Browser.Tests.csproj b/tests/Foragent.Browser.Tests/Foragent.Browser.Tests.csproj index d68cd27..82ad798 100644 --- a/tests/Foragent.Browser.Tests/Foragent.Browser.Tests.csproj +++ b/tests/Foragent.Browser.Tests/Foragent.Browser.Tests.csproj @@ -5,6 +5,7 @@ +