diff --git a/docs/cli.md b/docs/cli.md index 21a44c8b..bb94f30f 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -44,14 +44,27 @@ Global flags (apply to all subcommands): `--verbose, -v` (enable info-level logg - `--dry-run` - Print the planned set without calling the ADO API. - `--org / --project / --pat` - ADO context overrides (same semantics as the other lifecycle commands). - `--definition-ids ` - Explicit pipeline definition IDs (comma-separated; skips local-fixture auto-detection). + - `--all-repos` - **Project-scope mode.** Activates Preview-driven discovery and operates on every ado-aw pipeline ADO knows about in the project — direct ado-aw definitions *and* consumer pipelines that include ado-aw templates — regardless of which repo their root YAML lives in. Mutually exclusive with `--definition-ids`. Ignores local lock files for matching (uses ADO Pipeline Preview to find marker steps). + - `--source ` - **Filter by template.** Restricts to definitions whose `# ado-aw-metadata` marker references the given source path (e.g. `agents/security-scan.md`). Activates the discovery code path; pairs with `--all-repos` to scope across the whole project. Mutually exclusive with `--definition-ids`. - `secrets list [PATH]` - List variable names and their `isSecret` / `allowOverride` flags on every matched definition. **Never prints values.** - `--json` - Emit machine-readable JSON. - `--org / --project / --pat / --definition-ids` - As above. + - `--all-repos / --source ` - As for `secrets set` (project-scope discovery). - `secrets delete [PATH]` - Delete the named variable from every matched definition. No-op when the variable is absent. - `--dry-run` - Print the planned deletion plan without calling the ADO API. - `--org / --project / --pat / --definition-ids` - As above. + - `--all-repos / --source ` - As for `secrets set` (project-scope discovery). + +### Project-scope discovery (`--all-repos` / `--source`) + +`secrets set / list / delete` accept two opt-in flags that activate **Preview-driven discovery** instead of the default lexical local-fixture matching. They are the surface that solves token management for templates that get included by other pipelines. + +- **`--all-repos`** — search every definition in the ADO project. With it, you can `secrets set GITHUB_TOKEN --all-repos` from anywhere; no local checkout of the consumer pipelines is needed. +- **`--source `** — filter results to definitions whose `# ado-aw-metadata` marker references that template. Useful for fan-out: `secrets set GITHUB_TOKEN --source agents/security-scan.md` rotates the token on every consumer pipeline that includes that template. + +Both flags route through `ado-aw`'s `discover_ado_aw_pipelines` machinery, which calls ADO's Pipeline Preview API per definition and scans the expanded YAML for an `ado-aw-marker` step that every compiled pipeline now carries. `--definition-ids` remains the explicit-ID escape hatch and is mutually exclusive with these flags. `enable`, `disable`, and `remove` are **not** changed — they retain their source-scoped safety semantics. - `enable [PATH]` - Register an ADO build definition for each compiled pipeline discovered under `PATH` (or the current directory) and ensure it is `enabled`. For each fixture, matches against the existing ADO definitions by `yamlFilename` first, then by sanitized display name; creates a new definition when neither matches, flips `queueStatus` to `enabled` when an existing definition is `disabled` / `paused`, and skips when it is already `enabled`. Fail-soft per fixture; exits non-zero if any fixture failed. diff --git a/src/ado/discovery.rs b/src/ado/discovery.rs new file mode 100644 index 00000000..0b7d78bf --- /dev/null +++ b/src/ado/discovery.rs @@ -0,0 +1,1066 @@ +//! Preview-driven discovery of ado-aw pipelines. +//! +//! Replaces the lexical `match_definitions_in` approach (which only +//! finds definitions whose root YAML is itself an ado-aw lock file) +//! with a content-marker scan over expanded YAML returned by ADO's +//! Pipeline Preview API. Picks up consumer pipelines (definitions whose +//! root YAML is hand-written but `include`s an ado-aw template) and is +//! rename-resilient because the marker is in the YAML content rather +//! than the definition's `process.yamlFilename` field. +//! +//! ## Algorithm +//! +//! 1. List every definition in the project via `list_definitions`. +//! 2. Filter by [`DiscoveryScope`] — e.g., `CurrentRepo` matches +//! `repository.url` against the resolved git remote. +//! 3. Fast path: when a definition's `process.yamlFilename` matches one +//! of the supplied local lock files, parse the local file's +//! `# @ado-aw` header and mark the definition `Direct` without +//! spending a Preview call. +//! 4. Otherwise: POST to `/_apis/pipelines/{id}/preview` and scan the +//! response's `finalYaml` for `# ado-aw-metadata: {…}` markers +//! (see [`crate::detect::parse_marker_step`]). +//! 5. Classify per [`DiscoveryStatus`]. +//! +//! ## Empirical grounding +//! +//! The Preview API was validated against the live `msazuresphere/4x4` +//! project (definition 2434, `OS Release Readiness`) — a 56 KB +//! `finalYaml` was returned, comments inside step bodies preserved, +//! top-of-document comments stripped. The marker-step design in +//! `src/compile/extensions/ado_aw_marker.rs` solves the stripping +//! problem by embedding the marker inside a bash heredoc. + +use anyhow::{Context, Result}; +use log::{debug, warn}; +use serde::Deserialize; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; +use std::sync::Arc; +use tokio::sync::Semaphore; + +use super::{AdoAuth, AdoContext, DefinitionSummary, MatchMethod, MatchedDefinition, list_definitions}; +use crate::detect::{MarkerMetadata, parse_marker_step}; + +/// Default permits used to throttle concurrent Preview HTTP calls. +/// Tunable via the `ADO_AW_PREVIEW_CONCURRENCY` environment variable so +/// users hitting ADO rate limits in large projects can dial it down +/// without a code change. +const DEFAULT_PREVIEW_CONCURRENCY: usize = 8; + +// ─── Public types ──────────────────────────────────────────────────── + +/// Which ADO definitions to consider during discovery. +#[derive(Debug, Clone)] +pub enum DiscoveryScope { + /// Only definitions whose backing repository URL matches the + /// resolved git remote of the current working directory. Default + /// for project-scope CLI commands. + CurrentRepo, + /// Every definition in the project, regardless of repository. + AllRepos, + /// A pre-resolved list of definition IDs; bypasses listing and + /// scope filtering entirely. + /// + /// **Reserved for future use.** No production callsite constructs + /// this variant today — the `--definition-ids` CLI flag is handled + /// by `crate::ado::resolve_definitions` (the legacy lexical + /// matcher), which short-circuits before discovery is invoked. The + /// variant exists so callers that want to feed an explicit ID list + /// into the discovery pipeline (e.g. for future automation that + /// has pre-filtered definitions) don't need a parallel API. + Explicit(Vec), +} + +/// Classification of a single ADO definition with respect to ado-aw. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DiscoveryStatus { + /// The definition's root YAML is itself an ado-aw lock file — + /// either via the local-fixture fast-path, or because the expanded + /// YAML's first marker step appears at the root level. + Direct, + /// The definition's root YAML is hand-written but includes one or + /// more ado-aw templates (detected by markers inside the expanded + /// YAML). + Consumer, + /// Preview returned 400, typically because the consumer's + /// `parameters:` block has required fields with no defaults. The + /// definition may still be an ado-aw consumer; we just couldn't + /// confirm without supplying parameter values. + UnknownRequiredParams, + /// Preview returned 403 — the calling identity lacks read access + /// on the definition or one of its referenced repos. + UnknownForbidden, + /// Preview returned 404 — the definition disappeared between + /// `list_definitions` and `preview_pipeline` (race with a + /// concurrent delete). Tracked as a distinct status so it can be + /// filtered out of the skip-warning counts: there's no operator + /// action to take for a definition that no longer exists. + NotFound, + /// Preview returned some other error (5xx, network failure, etc.). + PreviewFailed(String), + /// Preview succeeded but no ado-aw marker was found in the + /// expanded YAML; the definition is not an ado-aw pipeline. + NotAdoAw, +} + +/// Result of discovery for a single ADO definition. +#[derive(Debug, Clone)] +pub struct DiscoveredPipeline { + pub definition_id: u64, + pub definition_name: String, + pub repository_url: Option, + pub queue_status: Option, + pub markers: Vec, + pub status: DiscoveryStatus, +} + +// ─── Preview API client ────────────────────────────────────────────── + +/// Typed error from a `preview_pipeline` call so callers can map ADO +/// failure modes to [`DiscoveryStatus`] without re-inspecting HTTP +/// response bodies. +#[derive(Debug, Clone)] +pub enum PreviewError { + /// 400 from ADO — usually means the pipeline declares required + /// `parameters:` without defaults. + RequiredParams, + /// 403 — calling identity lacks read access. + Forbidden, + /// 404 — definition does not exist (or is hidden from the caller). + NotFound, + /// Any other failure (5xx, network, malformed JSON). + Other(String), +} + +impl std::fmt::Display for PreviewError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + PreviewError::RequiredParams => write!(f, "preview returned 400 (required parameters without defaults)"), + PreviewError::Forbidden => write!(f, "preview returned 403 (forbidden)"), + PreviewError::NotFound => write!(f, "preview returned 404 (not found)"), + PreviewError::Other(msg) => write!(f, "preview failed: {msg}"), + } + } +} + +/// JSON shape of the Pipeline Preview response. Only `finalYaml` is +/// consumed; other fields (`yaml`, `id`, etc.) are intentionally +/// ignored — Preview also returns the un-rendered yaml under `yaml` +/// which is the wrong surface for marker discovery. +#[derive(Debug, Deserialize)] +struct PreviewResponse { + #[serde(rename = "finalYaml", default)] + final_yaml: Option, +} + +/// Call ADO's Pipeline Preview API and return the expanded `finalYaml`. +/// +/// Uses the `/_apis/pipelines/{id}/preview?api-version=7.1-preview.1` +/// endpoint (not the legacy build-definitions surface — Preview is the +/// only documented public way to get expanded YAML). +pub async fn preview_pipeline( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + definition_id: u64, +) -> std::result::Result { + let url = format!( + "{}/{}/_apis/pipelines/{}/preview?api-version=7.1-preview.1", + ctx.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&ctx.project, super::PATH_SEGMENT), + definition_id + ); + + let body = serde_json::json!({ + "previewRun": true, + "templateParameters": {} + }); + + debug!("POST {} (preview definition {})", url, definition_id); + + let resp = auth + .apply(client.post(&url).json(&body)) + .send() + .await + .map_err(|e| PreviewError::Other(format!("network error: {e}")))?; + + let status = resp.status(); + if status.as_u16() == 400 { + return Err(PreviewError::RequiredParams); + } + if status.as_u16() == 403 || status.as_u16() == 401 { + return Err(PreviewError::Forbidden); + } + if status.as_u16() == 404 { + return Err(PreviewError::NotFound); + } + if !status.is_success() { + let body = resp.text().await.unwrap_or_default(); + return Err(PreviewError::Other(format!( + "HTTP {} from preview endpoint: {}", + status, + body.chars().take(500).collect::() + ))); + } + + let parsed: PreviewResponse = resp + .json() + .await + .map_err(|e| PreviewError::Other(format!("malformed preview JSON: {e}")))?; + + parsed + .final_yaml + .ok_or_else(|| PreviewError::Other("preview response missing finalYaml field".to_string())) +} + +// ─── Discovery driver ──────────────────────────────────────────────── + +/// Discover every ado-aw pipeline in scope. +/// +/// `local_lock_paths` enables the fast-path: definitions whose +/// `process.yamlFilename` matches one of these paths skip the Preview +/// call and are classified `Direct` by reading the local file's +/// `# @ado-aw` header (cheap, no HTTP). When `None` or empty, every +/// definition in scope is Previewed. +pub async fn discover_ado_aw_pipelines( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + scope: DiscoveryScope, + local_lock_paths: Option<&[PathBuf]>, +) -> Result> { + let definitions = list_definitions(client, ctx, auth) + .await + .context("Failed to list ADO definitions for discovery")?; + + let filtered = apply_scope_filter(definitions, &scope, &ctx.repo_url()); + + // Build a (normalized yamlFilename → local lock path) map for the + // fast-path. Path comparison uses the same normalization as + // `match_definitions_in`. + let lock_map = build_lock_path_map(local_lock_paths); + + // Resolve concurrency from env; warn (not silently clamp) when an + // operator sets `=0`, since the deadlock-avoidance `.max(1)` would + // mask the typo and leave the user wondering why throughput hasn't + // changed. + let raw_permits = std::env::var("ADO_AW_PREVIEW_CONCURRENCY") + .ok() + .and_then(|v| v.parse::().ok()); + let permits = match raw_permits { + Some(0) => { + warn!( + "ADO_AW_PREVIEW_CONCURRENCY=0 would deadlock the Preview semaphore; \ + clamping to 1. Set a positive integer to control concurrency.", + ); + 1 + } + Some(n) => n, + None => DEFAULT_PREVIEW_CONCURRENCY, + }; + let semaphore = Arc::new(Semaphore::new(permits.max(1))); + + let mut handles = Vec::with_capacity(filtered.len()); + for def in filtered { + let local_match = def + .process + .as_ref() + .and_then(|p| p.yaml_filename.as_ref()) + .and_then(|f| lock_map.get(&super::normalize_ado_yaml_path(f)).cloned()); + + let sem = Arc::clone(&semaphore); + let client = client.clone(); + let ctx = ctx.clone(); + let auth = auth.clone(); + + handles.push(tokio::spawn(async move { + let _permit = sem.acquire_owned().await.expect("semaphore not closed"); + classify_definition(&client, &ctx, &auth, def, local_match).await + })); + } + + let mut results = Vec::with_capacity(handles.len()); + for handle in handles { + match handle.await { + Ok(p) => results.push(p), + Err(e) => warn!("Discovery worker panicked: {e}"), + } + } + + Ok(results) +} + +/// Pure scope filter, factored out so it's exercised by unit tests. +fn apply_scope_filter( + definitions: Vec, + scope: &DiscoveryScope, + current_repo_url: &Option, +) -> Vec { + match scope { + DiscoveryScope::AllRepos => definitions, + DiscoveryScope::Explicit(ids) => definitions + .into_iter() + .filter(|d| ids.contains(&d.id)) + .collect(), + DiscoveryScope::CurrentRepo => { + let Some(current) = current_repo_url else { + // No git remote → nothing matches CurrentRepo. Caller + // can fall back to AllRepos or an explicit ID list. + return Vec::new(); + }; + let target = normalize_repo_url(current); + definitions + .into_iter() + .filter(|d| { + d.repository + .as_ref() + .and_then(|r| r.url.as_ref()) + .map(|u| normalize_repo_url(u) == target) + .unwrap_or(false) + }) + .collect() + } + } +} + +/// Normalize a repo URL for equality comparison. +/// +/// Two normalizations are applied so the comparison is robust to the +/// shape ADO returns: +/// +/// 1. **Percent-decode** the URL so a project named e.g. `My Project` +/// compares equal whether ADO returned `My%20Project` or the (rare +/// but legal) decoded `My Project`. Lossy decoding on invalid UTF-8 +/// keeps us forward-compatible — anything ADO can return, we can +/// compare. +/// 2. **ASCII-lowercase** because ADO is case-insensitive on org / +/// project / repo identifiers, and trim any trailing `/`. +/// +/// Without (1), the comparison would silently fail for any project +/// name containing a percent-reserved character if `ado_ctx.repo_url()` +/// emitted the encoded form and `repository.url` returned the decoded +/// form (or vice-versa). +fn normalize_repo_url(url: &str) -> String { + let decoded = percent_encoding::percent_decode_str(url) + .decode_utf8_lossy() + .into_owned(); + decoded.trim_end_matches('/').to_ascii_lowercase() +} + +/// Build a `(normalized yamlFilename → local lock path)` lookup table +/// from `--source agents/foo.lock.yml` or similar inputs. +fn build_lock_path_map(local_lock_paths: Option<&[PathBuf]>) -> HashMap { + let mut map = HashMap::new(); + let Some(paths) = local_lock_paths else { + return map; + }; + for path in paths { + let normalized = path.to_string_lossy().replace('\\', "/"); + // Match the same trim that `normalize_ado_yaml_path` applies. + let trimmed = normalized.trim_start_matches('/').to_string(); + map.insert(trimmed, path.clone()); + } + map +} + +async fn classify_definition( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + def: DefinitionSummary, + local_match: Option, +) -> DiscoveredPipeline { + let repository_url = def.repository.as_ref().and_then(|r| r.url.clone()); + + // Fast path: if process.yamlFilename matched a local lock file, + // parse it directly. Avoids one HTTP round-trip per ado-aw-owned + // definition. The local file existing but not parsing falls + // through to Preview as a defensive measure. + if let Some(local_path) = local_match + && let Some(meta) = parse_local_lock(&local_path).await + { + return DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![meta], + status: DiscoveryStatus::Direct, + }; + } + + match preview_pipeline(client, ctx, auth, def.id).await { + Ok(final_yaml) => { + let markers = parse_marker_step(&final_yaml); + let status = if markers.is_empty() { + DiscoveryStatus::NotAdoAw + } else if is_direct_match(&def, &markers) { + DiscoveryStatus::Direct + } else { + DiscoveryStatus::Consumer + }; + DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers, + status, + } + } + Err(PreviewError::RequiredParams) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::UnknownRequiredParams, + }, + Err(PreviewError::Forbidden) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::UnknownForbidden, + }, + Err(PreviewError::NotFound) => { + // Definition was deleted between `list_definitions` and the + // Preview call (TOCTOU race with a concurrent delete). + // Track as a distinct status so it's excluded from the + // "Preview Failed" warning — there's no operator action to + // take for a definition that no longer exists. + debug!( + "Definition {} ({}) disappeared between list and preview (404); skipping", + def.id, def.name + ); + DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::NotFound, + } + } + Err(e) => DiscoveredPipeline { + definition_id: def.id, + definition_name: def.name, + repository_url, + queue_status: def.queue_status, + markers: vec![], + status: DiscoveryStatus::PreviewFailed(e.to_string()), + }, + } +} + +/// Heuristic: classify as `Direct` if the definition's root YAML is +/// itself an ado-aw lock file, otherwise `Consumer`. The check uses +/// the definition's `process.yamlFilename` as a proxy — if the source +/// markdown referenced by the marker has the same stem as the root +/// YAML, the definition is the direct owner. Anything else is a +/// consumer that pulls the template in via `template:` indirection. +/// +/// Returns `false` for the marker-less case — `classify_definition` +/// only routes here when at least one marker was found, but defensive +/// against future callers. +fn is_direct_match(def: &DefinitionSummary, markers: &[MarkerMetadata]) -> bool { + if markers.is_empty() { + // 0 markers means "not ado-aw at all", which is neither direct + // nor consumer. Belt-and-braces — `classify_definition` + // currently guards this, but the guard could move. + return false; + } + if markers.len() > 1 { + // Multiple markers means a consumer pulling in more than one + // template; can't be a direct ado-aw pipeline. + return false; + } + let marker = &markers[0]; + let Some(yaml_filename) = def + .process + .as_ref() + .and_then(|p| p.yaml_filename.as_ref()) + else { + return false; + }; + let yaml_normalized = super::normalize_ado_yaml_path(yaml_filename); + + // Map e.g. `agents/foo.md` → `agents/foo.lock.yml` and compare to + // the definition's root YAML. Convention: `.md` compiles to + // `.lock.yml`. + let Some(stem) = marker + .source + .strip_suffix(".md") + .map(|s| format!("{s}.lock.yml")) + else { + return false; + }; + yaml_normalized == stem || yaml_normalized.ends_with(&format!("/{stem}")) +} + +async fn parse_local_lock(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + // Two surfaces, in order of preference: + // (1) the structural marker step — survives Preview expansion and + // is identical to what the Preview path would parse; + // (2) the legacy top-of-file `# @ado-aw` header — kept for + // backward compat with pre-marker-extension lock files. + if let Some(meta) = parse_marker_step(&content).into_iter().next() { + return Some(meta); + } + // Fall back to the legacy header. + for line in content.lines().take(5) { + if let Some(h) = crate::detect::parse_header_line(line) { + return Some(MarkerMetadata { + schema: 0, + source: h.source, + version: h.version, + target: String::new(), + }); + } + } + None +} + +// ─── Adapters into the existing CLI types ──────────────────────────── + +/// Convert a [`DiscoveredPipeline`] into a [`MatchedDefinition`], the +/// shape used by every CLI command (`list`, `secrets set/list/delete`, +/// etc.). This keeps the rest of the codebase unchanged when commands +/// opt into discovery via `--all-repos` / `--source`. +/// +/// Returns `None` for any classification that isn't safely actionable +/// by a write command. In particular `UnknownRequiredParams`, +/// `UnknownForbidden`, and `PreviewFailed` are dropped because we +/// have no markers to attach (so we can't even tell the user which +/// template a write would affect); `NotAdoAw` is dropped because it +/// isn't ado-aw at all. Callers that want a richer summary (e.g. a +/// future `list --all-repos`) should inspect `DiscoveredPipeline` +/// directly rather than going through this adapter. +pub fn discovered_to_matched(d: &DiscoveredPipeline) -> Option { + match d.status { + DiscoveryStatus::Direct | DiscoveryStatus::Consumer => {} + DiscoveryStatus::NotAdoAw + | DiscoveryStatus::NotFound + | DiscoveryStatus::UnknownForbidden + | DiscoveryStatus::UnknownRequiredParams + | DiscoveryStatus::PreviewFailed(_) => return None, + } + + // Join every marker's source path so consumers that include + // multiple templates show up honestly in the CLI summary instead + // of silently truncating to whichever marker happened to be + // first. Also apply `sanitize_for_vso_logging` here: the + // `yaml_path` ends up in `print_matched_summary` (which writes to + // stdout), and if `ado-aw secrets set --all-repos` is ever invoked + // from inside an ADO pipeline step, an attacker-controlled marker + // source path containing `##vso[` would otherwise be processed by + // the agent's logging-command scanner. + let yaml_path = if d.markers.is_empty() { + String::new() + } else { + let joined = d + .markers + .iter() + .map(|m| m.source.as_str()) + .collect::>() + .join(", "); + sanitize_for_vso_logging(&joined) + }; + + Some(MatchedDefinition { + id: d.definition_id, + name: d.definition_name.clone(), + match_method: MatchMethod::Discovery, + yaml_path, + queue_status: d.queue_status.clone(), + }) +} + +/// Neutralise ADO build-agent logging-command prefixes (`##vso[`, +/// `##[`). Mirrors `crate::compile::extensions::ado_aw_marker` and +/// `crate::agent_stats::sanitize_for_markdown` so that data flowing +/// from a Preview-discovered marker through the CLI's own stdout +/// cannot smuggle a `task.setvariable` (or similar) when the CLI is +/// invoked from inside an ADO pipeline step. +fn sanitize_for_vso_logging(s: &str) -> String { + s.replace("##vso[", "[vso-filtered][") + .replace("##[", "[filtered][") +} + +/// CLI-facing wrapper: run Preview-driven discovery with the given +/// scope, optionally filter to consumers of a single template source, +/// and return the result as `Vec`. +/// +/// `source_filter` filters discovery results so only definitions whose +/// markers reference that source path are kept. Match is by exact +/// equality on the normalized source string in the marker JSON. +/// +/// Skip-summary warnings are emitted differently depending on whether +/// `source_filter` is active: +/// +/// - **Unfiltered (`--all-repos` alone)**: every `UnknownRequiredParams` +/// / `UnknownForbidden` / `PreviewFailed` definition is counted — +/// under `--all-repos` the user is operating on every ado-aw pipeline +/// in scope, so each failure represents a real skip. +/// +/// - **Filtered (`--source `)**: we can't tell whether a failed +/// definition would have been a consumer of `path` because we never +/// got markers out of it. Emitting per-status counts would mislead +/// the user into thinking they're missing consumers of their +/// template. Instead, emit a single conservative warning ("N +/// definitions could not be inspected; consumers of `` among +/// them may have been silently skipped") so the operator is informed +/// without being told false specifics. +pub async fn resolve_definitions_via_discovery( + client: &reqwest::Client, + ctx: &AdoContext, + auth: &AdoAuth, + scope: DiscoveryScope, + local_lock_paths: Option<&[PathBuf]>, + source_filter: Option<&str>, +) -> Result> { + let discovered = discover_ado_aw_pipelines(client, ctx, auth, scope, local_lock_paths).await?; + + let mut skipped_required_params = 0usize; + let mut skipped_forbidden = 0usize; + let mut skipped_failed = 0usize; + let mut uninspectable = 0usize; + + // Normalize the user-supplied `--source` value through the same + // canonical form the compiler uses for the marker JSON's `source` + // field. Without this, `--source ./agents/foo.md` or + // `--source agents\foo.md` (Windows) silently matches nothing + // because the marker stores `agents/foo.md`. + let normalized_filter: Option = source_filter + .map(|s| crate::compile::normalize_source_path(Path::new(s))); + + let kept: Vec<_> = discovered + .into_iter() + .filter(|d| { + let matches_filter = match normalized_filter.as_deref() { + Some(src) => d.markers.iter().any(|m| m.source == src), + None => true, + }; + + // Count toward skip totals only when the failure is + // relevant to the requested operation: + // - unfiltered: every failure is relevant (we're operating + // on every ado-aw pipeline in scope); + // - filtered: we can't attribute uninspectable definitions + // to a specific source, so use a single combined counter. + if normalized_filter.is_none() { + match &d.status { + DiscoveryStatus::UnknownRequiredParams => skipped_required_params += 1, + DiscoveryStatus::UnknownForbidden => skipped_forbidden += 1, + DiscoveryStatus::PreviewFailed(_) => skipped_failed += 1, + _ => {} + } + } else if matches!( + d.status, + DiscoveryStatus::UnknownRequiredParams + | DiscoveryStatus::UnknownForbidden + | DiscoveryStatus::PreviewFailed(_) + ) { + uninspectable += 1; + } + + matches_filter + }) + .collect(); + + if skipped_required_params > 0 { + warn!( + "Discovery skipped {skipped_required_params} definition(s) whose Pipeline Preview \ + requires templateParameters with no defaults. Use --definition-ids to act on them \ + directly.", + ); + } + if skipped_forbidden > 0 { + warn!( + "Discovery skipped {skipped_forbidden} definition(s) the calling identity lacks \ + read access to. Check your PAT or AAD permissions.", + ); + } + if skipped_failed > 0 { + warn!( + "Discovery skipped {skipped_failed} definition(s) whose Pipeline Preview returned \ + an unexpected error. Re-run with --debug to see details.", + ); + } + if uninspectable > 0 + && let Some(src) = normalized_filter.as_deref() + { + warn!( + "Discovery could not inspect {uninspectable} definition(s) (Preview failure, \ + forbidden, or required-parameters); any consumers of `{src}` among them have \ + been silently skipped. Re-run with --debug for per-definition reasons.", + ); + } + + Ok(kept.iter().filter_map(discovered_to_matched).collect()) +} + +// AdoContext helper: derive the resolved git remote URL for +// `CurrentRepo` scoping. Lives here (rather than on `AdoContext`) +// because the context only stores org+project+repo_name today; +// reconstructing the URL is a local detail of discovery. +// +// Percent-encodes `project` and `repo_name` to match the form ADO +// returns in `repository.url` — without this, projects whose names +// contain spaces or other reserved chars would silently match nothing +// because the lowercase comparison can't reconcile e.g. `my project` +// with `my%20project`. +impl AdoContext { + fn repo_url(&self) -> Option { + if self.repo_name.is_empty() { + return None; + } + Some(format!( + "{}/{}/_git/{}", + self.org_url.trim_end_matches('/'), + percent_encoding::utf8_percent_encode(&self.project, super::PATH_SEGMENT), + percent_encoding::utf8_percent_encode(&self.repo_name, super::PATH_SEGMENT), + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ado::{ProcessInfo, Repository}; + + fn def_with( + id: u64, + name: &str, + yaml_filename: Option<&str>, + repo_url: Option<&str>, + ) -> DefinitionSummary { + DefinitionSummary { + id, + name: name.to_string(), + process: yaml_filename.map(|f| ProcessInfo { + yaml_filename: Some(f.to_string()), + }), + queue_status: None, + path: None, + repository: repo_url.map(|u| Repository { + url: Some(u.to_string()), + name: None, + repo_type: None, + id: None, + }), + revision: None, + } + } + + // ── apply_scope_filter ─────────────────────────────────────────── + + #[test] + fn scope_all_repos_returns_everything() { + let defs = vec![ + def_with(1, "a", None, Some("https://dev.azure.com/o/p/_git/a")), + def_with(2, "b", None, Some("https://dev.azure.com/o/p/_git/b")), + ]; + let kept = apply_scope_filter(defs, &DiscoveryScope::AllRepos, &None); + assert_eq!(kept.len(), 2); + } + + #[test] + fn scope_explicit_filters_by_id() { + let defs = vec![ + def_with(1, "a", None, None), + def_with(2, "b", None, None), + def_with(3, "c", None, None), + ]; + let kept = apply_scope_filter(defs, &DiscoveryScope::Explicit(vec![1, 3]), &None); + assert_eq!(kept.iter().map(|d| d.id).collect::>(), vec![1, 3]); + } + + #[test] + fn scope_current_repo_matches_normalized_url() { + let defs = vec![ + def_with(1, "a", None, Some("https://dev.azure.com/Org/P/_git/Repo")), + def_with(2, "b", None, Some("https://dev.azure.com/Org/P/_git/Other")), + ]; + let current = Some("https://dev.azure.com/org/p/_git/repo/".to_string()); + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, ¤t); + assert_eq!(kept.len(), 1); + assert_eq!(kept[0].id, 1); + } + + #[test] + fn scope_current_repo_with_no_remote_returns_empty() { + let defs = vec![def_with(1, "a", None, Some("https://dev.azure.com/o/p/_git/x"))]; + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, &None); + assert!(kept.is_empty()); + } + + #[test] + fn scope_current_repo_excludes_definitions_without_repository() { + let defs = vec![ + def_with(1, "a", None, None), + def_with(2, "b", None, Some("https://dev.azure.com/o/p/_git/x")), + ]; + let current = Some("https://dev.azure.com/o/p/_git/x".to_string()); + let kept = apply_scope_filter(defs, &DiscoveryScope::CurrentRepo, ¤t); + assert_eq!(kept.len(), 1); + assert_eq!(kept[0].id, 2); + } + + // ── is_direct_match ────────────────────────────────────────────── + + #[test] + fn direct_when_yaml_filename_matches_marker_stem() { + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "standalone".to_string(), + }]; + assert!(is_direct_match(&def, &markers)); + } + + #[test] + fn direct_when_yaml_filename_has_extra_path_prefix() { + // ADO sometimes stores yamlFilename with a project-relative + // leading slash + extra path components. The marker source is + // just the markdown path the user passed at compile time. + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "standalone".to_string(), + }]; + assert!(is_direct_match(&def, &markers)); + } + + #[test] + fn consumer_when_yaml_filename_does_not_match_marker() { + let def = def_with(1, "a", Some("/release-readiness.yml"), None); + let markers = vec![MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "stage".to_string(), + }]; + assert!(!is_direct_match(&def, &markers)); + } + + #[test] + fn consumer_when_multiple_markers_present() { + let def = def_with(1, "a", Some("/agents/foo.lock.yml"), None); + let markers = vec![ + MarkerMetadata { + schema: 1, + source: "agents/foo.md".to_string(), + version: "0.30.0".to_string(), + target: "stage".to_string(), + }, + MarkerMetadata { + schema: 1, + source: "agents/bar.md".to_string(), + version: "0.30.0".to_string(), + target: "job".to_string(), + }, + ]; + // Multiple markers = at least one template is being included + // alongside something else; not a single direct ownership. + assert!(!is_direct_match(&def, &markers)); + } + + // ── build_lock_path_map ────────────────────────────────────────── + + #[test] + fn lock_map_normalizes_paths() { + let paths = vec![ + PathBuf::from("agents\\foo.lock.yml"), + PathBuf::from("/agents/bar.lock.yml"), + ]; + let map = build_lock_path_map(Some(&paths)); + assert!(map.contains_key("agents/foo.lock.yml")); + assert!(map.contains_key("agents/bar.lock.yml")); + } + + #[test] + fn lock_map_empty_for_none() { + assert!(build_lock_path_map(None).is_empty()); + } + + // ── PreviewError ───────────────────────────────────────────────── + + #[test] + fn preview_error_display_is_actionable() { + assert!( + PreviewError::RequiredParams + .to_string() + .contains("required parameters") + ); + assert!(PreviewError::Forbidden.to_string().contains("403")); + assert!(PreviewError::NotFound.to_string().contains("404")); + } + + // ── source_filter normalization ────────────────────────────────── + + #[test] + fn source_filter_normalization_matches_marker_form() { + // The marker stores normalized form (`agents/foo.md`). Verify + // that the same normalization applied to user input produces + // matchable strings for the common variants. + use crate::compile::normalize_source_path; + use std::path::Path; + + let canonical = normalize_source_path(Path::new("agents/foo.md")); + assert_eq!(canonical, "agents/foo.md"); + + // Leading `./` is stripped. + assert_eq!( + normalize_source_path(Path::new("./agents/foo.md")), + canonical + ); + + // Backslashes are normalized to forward slashes. + assert_eq!( + normalize_source_path(Path::new(r"agents\foo.md")), + canonical + ); + } + + // ── discovered_to_matched ──────────────────────────────────────── + + fn discovered(status: DiscoveryStatus) -> DiscoveredPipeline { + DiscoveredPipeline { + definition_id: 42, + definition_name: "test".to_string(), + repository_url: None, + queue_status: None, + markers: vec![], + status, + } + } + + #[test] + fn discovered_to_matched_drops_not_found() { + // 404 from Preview (definition deleted in flight) must not + // surface as a matched definition that a write command would + // act on — there's nothing to act on. + assert!(discovered_to_matched(&discovered(DiscoveryStatus::NotFound)).is_none()); + } + + #[test] + fn discovered_to_matched_drops_unactionable_statuses() { + for status in [ + DiscoveryStatus::NotAdoAw, + DiscoveryStatus::NotFound, + DiscoveryStatus::UnknownForbidden, + DiscoveryStatus::UnknownRequiredParams, + DiscoveryStatus::PreviewFailed("boom".to_string()), + ] { + assert!( + discovered_to_matched(&discovered(status.clone())).is_none(), + "expected {status:?} to map to None" + ); + } + } + + #[test] + fn discovered_to_matched_keeps_direct_and_consumer() { + assert!(discovered_to_matched(&discovered(DiscoveryStatus::Direct)).is_some()); + assert!(discovered_to_matched(&discovered(DiscoveryStatus::Consumer)).is_some()); + } + + #[test] + fn discovered_to_matched_joins_multiple_marker_sources() { + // A consumer that includes two templates must surface both + // sources in the yaml_path summary, not silently truncate to + // whichever happened to be first. + let mut d = discovered(DiscoveryStatus::Consumer); + d.markers = vec![ + MarkerMetadata { + schema: 1, + source: "agents/a.md".to_string(), + version: "1.0".to_string(), + target: "job".to_string(), + }, + MarkerMetadata { + schema: 1, + source: "agents/b.md".to_string(), + version: "1.0".to_string(), + target: "stage".to_string(), + }, + ]; + let matched = discovered_to_matched(&d).expect("Consumer kept"); + assert!( + matched.yaml_path.contains("agents/a.md") + && matched.yaml_path.contains("agents/b.md"), + "expected both marker sources in yaml_path, got: {}", + matched.yaml_path + ); + } + + #[test] + fn discovered_to_matched_sanitises_vso_in_yaml_path() { + // The yaml_path ends up in stdout via print_matched_summary. + // If the CLI is invoked from inside an ADO pipeline step, an + // attacker-controlled marker source path containing `##vso[` + // would otherwise be processed by the agent's logging-command + // scanner. + let mut d = discovered(DiscoveryStatus::Consumer); + d.markers = vec![MarkerMetadata { + schema: 1, + source: "agents/##vso[task.setvariable variable=X]value.md".to_string(), + version: "1.0".to_string(), + target: "job".to_string(), + }]; + let matched = discovered_to_matched(&d).expect("Consumer kept"); + assert!( + !matched.yaml_path.contains("##vso["), + "raw ##vso[ leaked into yaml_path: {}", + matched.yaml_path, + ); + assert!( + matched.yaml_path.contains("[vso-filtered]["), + "expected `##vso[` neutralised to `[vso-filtered][`: {}", + matched.yaml_path, + ); + } + + // ── normalize_repo_url ─────────────────────────────────────────── + + #[test] + fn normalize_repo_url_is_encoding_independent() { + // ADO usually returns percent-encoded URLs (`My%20Project`), + // but the comparison must work whichever shape both sides + // happen to be in. + let encoded = "https://dev.azure.com/Org/My%20Project/_git/Repo"; + let decoded = "https://dev.azure.com/Org/My Project/_git/Repo"; + assert_eq!(normalize_repo_url(encoded), normalize_repo_url(decoded)); + } + + #[test] + fn normalize_repo_url_is_case_insensitive_and_trims_trailing_slash() { + assert_eq!( + normalize_repo_url("https://dev.azure.com/Org/P/_git/Repo/"), + normalize_repo_url("https://dev.azure.com/org/p/_git/repo") + ); + } + + // ── sanitize_for_vso_logging ───────────────────────────────────── + + #[test] + fn discovery_sanitize_for_vso_logging_neutralises_prefixes() { + assert_eq!( + sanitize_for_vso_logging("##vso[task.setvariable variable=X]value"), + "[vso-filtered][task.setvariable variable=X]value" + ); + assert_eq!( + sanitize_for_vso_logging("##[warning]ignore me"), + "[filtered][warning]ignore me" + ); + assert_eq!(sanitize_for_vso_logging("agents/foo.md"), "agents/foo.md"); + } +} diff --git a/src/ado/mod.rs b/src/ado/mod.rs index 5f3bd6de..1f19586d 100644 --- a/src/ado/mod.rs +++ b/src/ado/mod.rs @@ -16,6 +16,8 @@ use std::path::Path; use crate::detect; +pub mod discovery; + /// ADO resource ID for minting ADO-scoped tokens via Azure CLI. const ADO_RESOURCE_ID: &str = "499b84ac-1321-427f-aa17-267ca6975798"; @@ -209,6 +211,38 @@ pub struct DefinitionSummary { /// `includeAllProperties=true`. May be absent on older API versions. #[serde(default)] pub path: Option, + /// Backing git repository (URL, name, type, id). Populated by ADO's + /// list endpoint without any extra query parameters. Used by + /// project-scope discovery to filter definitions by the current + /// git remote (`DiscoveryScope::CurrentRepo`). + #[serde(default)] + pub repository: Option, + /// Monotonic revision counter ADO bumps on every definition edit. + /// Deserialised here so a future Preview-driven discovery cache + /// can key on `(definition_id, revision)`. **No caching is + /// implemented yet** — see the discovery module for the current + /// in-process behaviour. Track in a follow-up before depending on + /// this for staleness checks. + #[serde(default)] + pub revision: Option, +} + +#[derive(Debug, Deserialize, Clone)] +pub struct Repository { + /// Browse URL of the backing repo (e.g. + /// `https://dev.azure.com/{org}/{project}/_git/{repo}`). Used for + /// `DiscoveryScope::CurrentRepo` filtering. + #[serde(default)] + pub url: Option, + /// Human-readable repo name. + #[serde(default)] + pub name: Option, + /// Repository provider (e.g. `"TfsGit"`, `"GitHub"`). + #[serde(rename = "type", default)] + pub repo_type: Option, + /// Backing repository ID (GUID for TfsGit, owner/repo for GitHub). + #[serde(default)] + pub id: Option, } #[derive(Debug, Deserialize)] @@ -223,6 +257,11 @@ pub enum MatchMethod { YamlPath, PipelineName, Explicit, + /// Found via Preview-driven discovery (Workstream P). Uniquely + /// identifies definitions that ADO knows about but where the + /// caller has no corresponding local lock file — i.e. consumer + /// pipelines and ado-aw definitions in other repos. + Discovery, } impl std::fmt::Display for MatchMethod { @@ -231,6 +270,7 @@ impl std::fmt::Display for MatchMethod { MatchMethod::YamlPath => write!(f, "yaml-path"), MatchMethod::PipelineName => write!(f, "pipeline-name"), MatchMethod::Explicit => write!(f, "explicit"), + MatchMethod::Discovery => write!(f, "discovery"), } } } @@ -1366,6 +1406,8 @@ mod tests { process: None, queue_status: None, path: None, + repository: None, + revision: None, } } @@ -1378,6 +1420,8 @@ mod tests { }), queue_status: None, path: None, + repository: None, + revision: None, } } diff --git a/src/compile/common.rs b/src/compile/common.rs index dcdcc2e8..45008294 100644 --- a/src/compile/common.rs +++ b/src/compile/common.rs @@ -1219,8 +1219,14 @@ pub const HEADER_MARKER: &str = "# @ado-aw"; /// The source path is the input path as provided to the compiler (e.g., `agents/my-agent.md`, /// `.azdo/pipelines/review.md`, or any other location the user chose). Path separators /// are normalized to forward slashes for cross-platform consistency. -pub fn generate_header_comment(input_path: &std::path::Path) -> String { - let version = env!("CARGO_PKG_VERSION"); +/// Normalise a source markdown path for embedding in generated YAML. +/// +/// Applies the same transformations the `# @ado-aw` comment header uses +/// (forward-slash separators, newline/CR stripping, `"` escaping, leading +/// `./` collapsed). Shared between [`generate_header_comment`] and the +/// always-on `ado-aw-marker` compiler extension so both surfaces agree on +/// the canonical form of a source path. +pub fn normalize_source_path(input_path: &std::path::Path) -> String { let mut source_path = input_path .to_string_lossy() .replace('\\', "/") @@ -1233,6 +1239,13 @@ pub fn generate_header_comment(input_path: &std::path::Path) -> String { source_path = source_path[2..].to_string(); } + source_path +} + +pub fn generate_header_comment(input_path: &std::path::Path) -> String { + let version = env!("CARGO_PKG_VERSION"); + let source_path = normalize_source_path(input_path); + format!( "# This file is auto-generated by ado-aw. Do not edit manually.\n\ # @ado-aw source=\"{}\" version={}\n", @@ -3220,8 +3233,7 @@ pub async fn compile_template_target( let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(Path::new(".")); - let ctx = CompileContext::new(front_matter, input_dir).await?; + let ctx = CompileContext::new(front_matter, input_path).await?; // Generate stage prefix for job-name uniqueness and template parameters let stage_prefix = generate_stage_prefix(&front_matter.name); diff --git a/src/compile/extensions/ado_aw_marker.rs b/src/compile/extensions/ado_aw_marker.rs new file mode 100644 index 00000000..bc5d65f5 --- /dev/null +++ b/src/compile/extensions/ado_aw_marker.rs @@ -0,0 +1,284 @@ +//! Always-on ado-aw marker extension. +//! +//! Injects a single informational step into the Setup job of every +//! compiled pipeline. The step's bash body carries a machine-readable +//! JSON metadata blob keyed by a `# ado-aw-metadata:` prefix, plus a +//! runtime `echo` for build-log visibility. +//! +//! Why a step (and not a top-of-file comment): ADO's Pipeline Preview +//! API strips top-of-document leading comments during YAML expansion +//! (verified empirically against live def 2434 in `msazuresphere/4x4`). +//! Comments embedded inside step bodies are preserved verbatim. The +//! marker has to live inside a step body to survive Preview-driven +//! discovery. +//! +//! Why JSON inside the marker: forward-compatible schema. New fields +//! (e.g., compiler-derived secrets list) can be added without breaking +//! older parsers, mirroring gh-aw's `# gh-aw-metadata: {...}` shape. + +use super::{CompileContext, CompilerExtension, ExtensionPhase}; +use anyhow::Result; + +// ─── ado-aw marker (always-on, internal) ───────────────────────────── + +/// Always-on internal extension that embeds machine-readable +/// `# ado-aw-metadata: {…}` JSON inside an injected Setup-job step. +/// +/// The metadata is the canonical surface consumed by Preview-driven +/// project-scope discovery in [`crate::ado`]. Discovery enumerates ADO +/// definitions, expands each via the Pipeline Preview API, and greps +/// the result for this marker. +pub struct AdoAwMarkerExtension; + +impl CompilerExtension for AdoAwMarkerExtension { + fn name(&self) -> &str { + "ado-aw-marker" + } + + fn phase(&self) -> ExtensionPhase { + // Tool phase keeps the marker step grouped with the other + // always-on internal extensions (GitHub, SafeOutputs). The + // marker has no execution dependency on anything else; the + // phase choice is purely about emit order. + ExtensionPhase::Tool + } + + fn setup_steps(&self, ctx: &CompileContext) -> Result> { + // In unit-test contexts that build a CompileContext without an + // input_path (e.g. CompileContext::for_test), skip the marker. + // Production paths always populate input_path via + // CompileContext::new. + let Some(input_path) = ctx.input_path else { + return Ok(vec![]); + }; + + let source = super::super::common::normalize_source_path(input_path); + let version = env!("CARGO_PKG_VERSION"); + let target = ctx.front_matter.target.as_str(); + + let metadata_json = serde_json::json!({ + "schema": 1, + "source": source, + "version": version, + "target": target, + }) + .to_string(); + + // The `# ado-aw-metadata:` line is the parse target for + // discovery. The `echo` makes the same information visible in + // the build log at runtime, which is a free human-discoverability + // bonus and costs nothing because the step runs in milliseconds. + // + // The echo's source value goes through two sanitisations: + // + // 1. `sanitize_for_vso_logging` neutralises `##vso[` and `##[` + // prefixes. The ADO build agent scans stdout for those + // sequences and treats them as logging commands (e.g. + // `task.setvariable`). An attacker who controls a markdown + // filename could otherwise inject a logging command into + // the build log via the echoed source path. Same convention + // used by `agent_stats::sanitize_for_markdown`. + // + // 2. `bash_single_quote_escape` applies the `'\''` idiom so a + // filename containing `'` (e.g. `agents/foo's.md`) doesn't + // produce syntactically broken bash. `version` and `target` + // are controlled inputs and can't contain either. + let echo_source = bash_single_quote_escape(&sanitize_for_vso_logging(&source)); + let step = format!( + "- bash: |\n \ + # ado-aw-metadata: {metadata}\n \ + echo 'ado-aw metadata: source={echo_source} version={version} target={target}'\n \ + displayName: \"ado-aw\"\n", + metadata = metadata_json, + echo_source = echo_source, + version = version, + target = target, + ); + + Ok(vec![step]) + } +} + +/// Escape any `'` in `s` so it can be safely embedded inside a single-quoted +/// bash string. Replaces each `'` with `'\''` (close-quote, escaped quote, +/// reopen-quote — the canonical idiom). +fn bash_single_quote_escape(s: &str) -> String { + s.replace('\'', "'\\''") +} + +/// Neutralise ADO build-agent logging-command prefixes (`##vso[`, `##[`). +/// Mirrors `crate::agent_stats::sanitize_for_markdown` so a malicious +/// filename can't smuggle a `task.setvariable` (or similar) through the +/// runtime `echo` line in the marker step. +fn sanitize_for_vso_logging(s: &str) -> String { + s.replace("##vso[", "[vso-filtered][") + .replace("##[", "[filtered][") +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::compile::extensions::CompileContext; + use crate::compile::types::FrontMatter; + use std::path::Path; + + fn parse_fm(yaml: &str) -> FrontMatter { + serde_yaml::from_str(yaml).expect("front matter parses") + } + + #[test] + fn returns_no_step_when_input_path_absent() { + let fm = parse_fm("name: t\ndescription: x\n"); + let ctx = CompileContext::for_test(&fm); + let steps = AdoAwMarkerExtension.setup_steps(&ctx).unwrap(); + assert!(steps.is_empty(), "expected no marker when input_path is None"); + } + + #[test] + fn emits_single_step_with_canonical_displayname() { + // Production path: CompileContext::new populates input_path. + // Simulate by hand for this unit test. + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.setup_steps(&ctx).unwrap(); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + assert!(step.contains("displayName: \"ado-aw\""), "step missing displayName:\n{step}"); + assert!(step.contains("# ado-aw-metadata:"), "step missing JSON marker line:\n{step}"); + assert!(step.contains("\"source\":\"agents/foo.md\""), "step missing source field:\n{step}"); + assert!(step.contains("\"target\":\"standalone\""), "step missing target field:\n{step}"); + assert!(step.contains("\"schema\":1"), "step missing schema field:\n{step}"); + } + + #[test] + fn target_field_reflects_front_matter() { + for (raw_target, expected) in [ + ("standalone", "standalone"), + ("1es", "1es"), + ("job", "job"), + ("stage", "stage"), + ] { + let yaml = format!("name: t\ndescription: x\ntarget: {raw_target}\n"); + let fm = parse_fm(&yaml); + let input_path = Path::new("agents/foo.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.setup_steps(&ctx).unwrap(); + assert_eq!(steps.len(), 1, "target={raw_target}"); + assert!( + steps[0].contains(&format!("\"target\":\"{expected}\"")), + "expected target={expected} in step (raw input {raw_target}):\n{}", + steps[0] + ); + } + } + + #[test] + fn bash_single_quote_escape_idiom_is_correct() { + // Standard bash idiom: close-quote, escaped quote, reopen. + assert_eq!(bash_single_quote_escape("a'b"), "a'\\''b"); + assert_eq!(bash_single_quote_escape("''"), "'\\'''\\''"); + assert_eq!(bash_single_quote_escape("plain"), "plain"); + assert_eq!(bash_single_quote_escape(""), ""); + } + + #[test] + fn echo_line_handles_single_quote_in_source_path() { + // A markdown filename with `'` in it must produce syntactically + // valid bash. Without the escape, the generated step would + // break with "unexpected EOF while looking for matching `''". + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/foo's-agent.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.setup_steps(&ctx).unwrap(); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + assert!( + step.contains("echo 'ado-aw metadata: source=agents/foo'\\''s-agent.md "), + "single-quote in source should be escaped via the '\\'' idiom; got:\n{step}", + ); + // The JSON marker line should still carry the raw (un-bash-escaped) + // source — JSON has no quoting concern with `'`. + assert!( + step.contains("\"source\":\"agents/foo's-agent.md\""), + "JSON marker should carry raw source unchanged:\n{step}", + ); + } + + #[test] + fn sanitize_for_vso_logging_neutralises_known_prefixes() { + assert_eq!( + sanitize_for_vso_logging("##vso[task.setvariable variable=X]value"), + "[vso-filtered][task.setvariable variable=X]value" + ); + assert_eq!( + sanitize_for_vso_logging("##[warning]ignore me"), + "[filtered][warning]ignore me" + ); + assert_eq!(sanitize_for_vso_logging("agents/foo.md"), "agents/foo.md"); + assert_eq!(sanitize_for_vso_logging(""), ""); + } + + #[test] + fn echo_line_neutralises_vso_injection_attempt() { + // An attacker who controls a markdown filename must not be able + // to inject ADO logging commands into the build log via the + // echoed source path. The ADO agent scans stdout for `##vso[` + // and `##[` prefixes and treats matching sequences as task + // commands (setvariable, setoutput, etc.). + let fm = parse_fm("name: t\ndescription: x\n"); + let input_path = Path::new("agents/##vso[task.setvariable variable=FOO]value.md"); + let ctx = CompileContext { + agent_name: &fm.name, + front_matter: &fm, + ado_context: None, + engine: crate::engine::Engine::Copilot, + compile_dir: None, + input_path: Some(input_path), + }; + let steps = AdoAwMarkerExtension.setup_steps(&ctx).unwrap(); + assert_eq!(steps.len(), 1); + let step = &steps[0]; + + // Find the `echo` line specifically — the `# ado-aw-metadata` + // JSON line is allowed to carry the raw source (it's not echoed + // to stdout by ADO; it's a comment in the bash heredoc, not + // output at runtime). The JSON line *does* get written to the + // build log when ADO renders the step body, but as `# ...` + // comments inside the rendered yaml; those don't trip the + // logging-command scanner. + let echo_line = step + .lines() + .find(|l| l.trim_start().starts_with("echo 'ado-aw metadata:")) + .expect("must have echo line"); + assert!( + !echo_line.contains("##vso["), + "raw ##vso[ leaked into echo line: {echo_line}" + ); + assert!( + echo_line.contains("[vso-filtered]["), + "expected `##vso[` neutralised to `[vso-filtered][` in echo line: {echo_line}" + ); + } +} diff --git a/src/compile/extensions/mod.rs b/src/compile/extensions/mod.rs index 1d2964f3..efbc35cb 100644 --- a/src/compile/extensions/mod.rs +++ b/src/compile/extensions/mod.rs @@ -109,15 +109,21 @@ pub struct CompileContext<'a> { /// `nuget.config` should be resolved). `None` for unit-test contexts /// where no on-disk repo exists. pub compile_dir: Option<&'a Path>, + /// Path of the input markdown file being compiled (e.g. + /// `agents/release-readiness.md`). `None` for unit-test contexts. + /// Consumed by the always-on `ado-aw-marker` compiler extension to + /// embed source-path metadata in the compiled YAML. + pub input_path: Option<&'a Path>, } impl<'a> CompileContext<'a> { /// Build a fully-resolved compile context. /// /// Resolves the engine implementation from front matter and infers ADO - /// context from the git remote in `compile_dir`. Returns an error if - /// the engine identifier is unsupported. - pub async fn new(front_matter: &'a FrontMatter, compile_dir: &'a Path) -> Result { + /// context from the git remote in the directory containing `input_path`. + /// Returns an error if the engine identifier is unsupported. + pub async fn new(front_matter: &'a FrontMatter, input_path: &'a Path) -> Result { + let compile_dir = input_path.parent().unwrap_or(Path::new(".")); let engine = engine::get_engine(front_matter.engine.engine_id())?; let ado_context = Self::infer_ado_context(compile_dir).await; Ok(Self { @@ -126,6 +132,7 @@ impl<'a> CompileContext<'a> { ado_context, engine, compile_dir: Some(compile_dir), + input_path: Some(input_path), }) } @@ -175,6 +182,7 @@ impl<'a> CompileContext<'a> { ado_context: None, engine: crate::engine::Engine::Copilot, compile_dir: None, + input_path: None, } } @@ -191,6 +199,7 @@ impl<'a> CompileContext<'a> { }), engine: crate::engine::Engine::Copilot, compile_dir: None, + input_path: None, } } @@ -203,6 +212,7 @@ impl<'a> CompileContext<'a> { ado_context: None, engine: crate::engine::Engine::Copilot, compile_dir: Some(compile_dir), + input_path: None, } } } @@ -572,11 +582,13 @@ macro_rules! extension_enum { }; } +mod ado_aw_marker; mod github; mod safe_outputs; pub(crate) mod trigger_filters; // Re-export tool/runtime extensions from their colocated homes +pub use ado_aw_marker::AdoAwMarkerExtension; pub use crate::tools::azure_devops::AzureDevOpsExtension; pub use crate::tools::cache_memory::CacheMemoryExtension; pub use github::GitHubExtension; @@ -593,6 +605,7 @@ extension_enum! { /// Uses static dispatch (no `Box`) — each variant delegates to /// the inner type's [`CompilerExtension`] implementation. pub enum Extension { + AdoAwMarker(AdoAwMarkerExtension), GitHub(GitHubExtension), SafeOutputs(SafeOutputsExtension), Lean(LeanExtension), @@ -624,6 +637,7 @@ pub fn collect_extensions(front_matter: &FrontMatter) -> Vec { let mut extensions = Vec::new(); // ── Always-on internal extensions ── + extensions.push(Extension::AdoAwMarker(AdoAwMarkerExtension)); extensions.push(Extension::GitHub(GitHubExtension)); extensions.push(Extension::SafeOutputs(SafeOutputsExtension)); diff --git a/src/compile/extensions/tests.rs b/src/compile/extensions/tests.rs index 4f182c36..0bd1609c 100644 --- a/src/compile/extensions/tests.rs +++ b/src/compile/extensions/tests.rs @@ -88,8 +88,9 @@ fn test_awf_mount_serde_roundtrip() { fn test_collect_extensions_empty_front_matter() { let fm = minimal_front_matter(); let exts = collect_extensions(&fm); - // Always-on: GitHub + SafeOutputs - assert_eq!(exts.len(), 2); + // Always-on: ado-aw-marker + GitHub + SafeOutputs + assert_eq!(exts.len(), 3); + assert!(exts.iter().any(|e| e.name() == "ado-aw-marker")); assert!(exts.iter().any(|e| e.name() == "GitHub")); assert!(exts.iter().any(|e| e.name() == "SafeOutputs")); } @@ -100,7 +101,7 @@ fn test_collect_extensions_lean_enabled() { parse_markdown("---\nname: test\ndescription: test\nruntimes:\n lean: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + Lean + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + Lean assert_eq!(exts[0].name(), "Lean 4"); // Runtime phase sorts first } @@ -110,7 +111,7 @@ fn test_collect_extensions_lean_disabled() { parse_markdown("---\nname: test\ndescription: test\nruntimes:\n lean: false\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 2); // Just always-on + assert_eq!(exts.len(), 3); // Just always-on } #[test] @@ -119,7 +120,7 @@ fn test_collect_extensions_azure_devops_enabled() { parse_markdown("---\nname: test\ndescription: test\ntools:\n azure-devops: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + AzureDevOps + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + AzureDevOps assert!(exts.iter().any(|e| e.name() == "Azure DevOps MCP")); } @@ -129,7 +130,7 @@ fn test_collect_extensions_cache_memory_enabled() { parse_markdown("---\nname: test\ndescription: test\ntools:\n cache-memory: true\n---\n") .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 3); // GitHub + SafeOutputs + CacheMemory + assert_eq!(exts.len(), 4); // ado-aw-marker + GitHub + SafeOutputs + CacheMemory assert!(exts.iter().any(|e| e.name() == "Cache Memory")); } @@ -140,7 +141,7 @@ fn test_collect_extensions_all_enabled() { ) .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 5); // GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory + assert_eq!(exts.len(), 6); // ado-aw-marker + GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory assert_eq!(exts[0].name(), "Lean 4"); // Runtime phase first // All tool-phase extensions follow assert!(exts[1..].iter().all(|e| e.phase() == ExtensionPhase::Tool)); @@ -156,7 +157,7 @@ fn test_collect_extensions_runtimes_always_before_tools() { ) .unwrap(); let exts = collect_extensions(&fm); - assert_eq!(exts.len(), 5); // GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory + assert_eq!(exts.len(), 6); // ado-aw-marker + GitHub + SafeOutputs + Lean + AzureDevOps + CacheMemory // Find the boundary: last Runtime and first Tool let last_runtime_idx = exts diff --git a/src/compile/mod.rs b/src/compile/mod.rs index 1c698cbc..333cb61d 100644 --- a/src/compile/mod.rs +++ b/src/compile/mod.rs @@ -31,6 +31,7 @@ pub use common::parse_markdown; #[allow(unused_imports)] pub use common::{atomic_write, parse_markdown_detailed, reconstruct_source, ParsedSource}; pub use common::HEADER_MARKER; +pub use common::normalize_source_path; pub use common::ADO_MCP_ENTRYPOINT; pub use common::ADO_MCP_IMAGE; pub use common::ADO_MCP_PACKAGE; @@ -198,6 +199,10 @@ async fn compile_pipeline_inner( }; info!("Using {} compiler", compiler.target_name()); + // Snapshot the version in the existing output file before overwriting it. + // Used below to emit an upgrade note when the compiler version changes. + let existing_version = read_existing_pipeline_version(&yaml_output_path).await; + // Compile (no source mutation yet — a failure here must leave the // source byte-identical). let pipeline_yaml = compiler @@ -251,6 +256,21 @@ async fn compile_pipeline_inner( ); } + // Emit an upgrade note when an existing compiled file was produced by + // a different compiler version. This makes version bumps visible in the + // terminal and in CI logs without requiring the user to diff the output. + let current_version = env!("CARGO_PKG_VERSION"); + if let Some(ref old_version) = existing_version { + if old_version != current_version { + println!( + "note: upgraded {} (was v{}, now v{})", + yaml_output_path.display(), + old_version, + current_version, + ); + } + } + // Update .gitattributes at the repo root so every compiled pipeline is // marked as a generated file with `merge=ours`. Best-effort: skip with a // debug-level log when the output is not inside a git repository, since @@ -365,13 +385,35 @@ pub async fn compile_all_pipelines(skip_integrity: bool, debug_pipeline: bool) - return Ok(()); } + let current_version = env!("CARGO_PKG_VERSION"); + + // Count pipelines whose compiled version differs from the current compiler. + let outdated_count = detected + .iter() + .filter(|p| !p.version.is_empty() && p.version != current_version) + .count(); + println!("Found {} agentic pipeline(s):", detected.len()); for p in &detected { + let version_status = if p.version.is_empty() { + "(version unknown)".to_string() + } else if p.version == current_version { + "(up to date)".to_string() + } else { + format!("(out of date, compiled by v{})", p.version) + }; println!( - " {} (source: {}, version: {})", + " {} (source: {}) {}", p.yaml_path.display(), p.source, - p.version + version_status, + ); + } + + if outdated_count > 0 { + println!( + "\n{} pipeline(s) need recompilation with the current compiler (v{}).", + outdated_count, current_version, ); } println!(); @@ -692,6 +734,22 @@ fn format_diff(existing: &str, expected: &str, pipeline_path: &Path) -> String { output } +/// Read the compiler version embedded in an existing compiled pipeline file. +/// +/// Scans the first five lines of `path` for the `# @ado-aw source=… version=…` +/// header written by every compilation. Returns the version string when found, +/// `None` when the file does not exist, is unreadable, or has no recognisable +/// header. +async fn read_existing_pipeline_version(path: &Path) -> Option { + let content = tokio::fs::read_to_string(path).await.ok()?; + content + .lines() + .take(5) + .find_map(crate::detect::parse_header_line) + .filter(|meta| !meta.version.is_empty()) + .map(|meta| meta.version) +} + /// Walk up from `start` to find the nearest directory containing `.git`. fn find_repo_root(start: &Path) -> Option { let mut current = start.to_path_buf(); @@ -1019,4 +1077,70 @@ description: "A test agent for directory output" let _ = std::fs::remove_dir_all(&temp_dir); } + + // ─── read_existing_pipeline_version ───────────────────────────────────── + + #[tokio::test] + async fn read_existing_pipeline_version_returns_none_for_missing_file() { + let version = read_existing_pipeline_version(Path::new("/tmp/does-not-exist.lock.yml")).await; + assert!(version.is_none(), "expected None for a non-existent file"); + } + + #[tokio::test] + async fn read_existing_pipeline_version_returns_none_without_header() { + let temp = std::env::temp_dir().join(format!( + "ado-aw-ver-test-no-header-{}-{}.yml", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + std::fs::write(&temp, "# plain yaml\nname: foo\n").unwrap(); + let version = read_existing_pipeline_version(&temp).await; + assert!(version.is_none(), "expected None when file has no @ado-aw header"); + let _ = std::fs::remove_file(&temp); + } + + #[tokio::test] + async fn read_existing_pipeline_version_extracts_version_from_header() { + let temp = std::env::temp_dir().join(format!( + "ado-aw-ver-test-header-{}-{}.yml", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + // Simulate a compiled lock file header (only the first 5 lines matter). + let header = "# This file is auto-generated by ado-aw.\n\ + # @ado-aw source=\"agents/my-agent.md\" version=0.28.0\n\ + name: my-pipeline\n"; + std::fs::write(&temp, header).unwrap(); + let version = read_existing_pipeline_version(&temp).await; + assert_eq!(version.as_deref(), Some("0.28.0")); + let _ = std::fs::remove_file(&temp); + } + + #[tokio::test] + async fn read_existing_pipeline_version_returns_none_for_empty_version() { + let temp = std::env::temp_dir().join(format!( + "ado-aw-ver-test-empty-ver-{}-{}.yml", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + )); + // Header with source but no version field. + let header = "# @ado-aw source=\"agents/my-agent.md\"\n"; + std::fs::write(&temp, header).unwrap(); + let version = read_existing_pipeline_version(&temp).await; + assert!( + version.is_none(), + "expected None when version field is absent; got {:?}", + version + ); + let _ = std::fs::remove_file(&temp); + } } diff --git a/src/compile/onees.rs b/src/compile/onees.rs index ee45e31b..558ccbc4 100644 --- a/src/compile/onees.rs +++ b/src/compile/onees.rs @@ -47,8 +47,7 @@ impl Compiler for OneESCompiler { let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(Path::new(".")); - let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await?; + let ctx = super::extensions::CompileContext::new(front_matter, input_path).await?; // Generate values shared with standalone that are passed as extra replacements let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; @@ -66,7 +65,7 @@ impl Compiler for OneESCompiler { // Generate 1ES-specific setup/teardown jobs(no per-job pool, uses templateContext). // These override the shared {{ setup_job }} / {{ teardown_job }} markers via // extra_replacements, which are applied before the shared replacements. - let setup_job = generate_setup_job(&front_matter.setup); + let setup_job = generate_setup_job(&front_matter.setup, &extensions, &ctx)?; let teardown_job = generate_teardown_job(&front_matter.teardown); let config = CompileConfig { @@ -102,14 +101,45 @@ impl Compiler for OneESCompiler { /// Generate setup job for 1ES template. /// Unlike standalone, 1ES jobs don't have per-job `pool:` — the pool is at /// the top-level `parameters.pool`. Jobs use `templateContext: type: buildJob`. -fn generate_setup_job(setup_steps: &[serde_yaml::Value]) -> String { - if setup_steps.is_empty() { - return String::new(); +/// +/// Extension `setup_steps()` are injected before user setup steps (mirrors the +/// shared `generate_setup_job` in common.rs). The always-on ado-aw-marker +/// extension is the primary contributor; user setup_steps are appended after. +fn generate_setup_job( + setup_steps: &[serde_yaml::Value], + extensions: &[super::extensions::Extension], + ctx: &super::extensions::CompileContext, +) -> anyhow::Result { + use super::extensions::CompilerExtension; + + // Collect setup_steps from ALL extensions + let mut ext_setup_steps: Vec = Vec::new(); + for ext in extensions { + ext_setup_steps.extend(ext.setup_steps(ctx)?); } - let steps_yaml = format_steps_yaml_indented(setup_steps, 6); + if setup_steps.is_empty() && ext_setup_steps.is_empty() { + return Ok(String::new()); + } - format!( + // Steps in the 1ES templateContext.steps block are indented 6 spaces. + let mut body = String::new(); + + if !ext_setup_steps.is_empty() { + let ext_steps_combined = ext_setup_steps.join("\n\n"); + let indented = indent_block(&ext_steps_combined, " "); + body.push_str(&indented); + if !body.ends_with('\n') { + body.push('\n'); + } + } + + if !setup_steps.is_empty() { + let user_steps_yaml = format_steps_yaml_indented(setup_steps, 6); + body.push_str(&user_steps_yaml); + } + + Ok(format!( r#"- job: Setup displayName: "Setup" templateContext: @@ -118,8 +148,23 @@ fn generate_setup_job(setup_steps: &[serde_yaml::Value]) -> String { - checkout: self {} "#, - steps_yaml - ) + body.trim_end_matches('\n') + )) +} + +/// Indent every non-empty line in `block` with `prefix`. +fn indent_block(block: &str, prefix: &str) -> String { + block + .lines() + .map(|line| { + if line.is_empty() { + String::new() + } else { + format!("{prefix}{line}") + } + }) + .collect::>() + .join("\n") } /// Generate teardown job for 1ES template. @@ -153,17 +198,27 @@ mod tests { #[test] fn test_generate_setup_job_empty_steps() { - let result = generate_setup_job(&[]); - assert!(result.is_empty(), "Empty setup steps should return empty string"); + let fm = parse_test_fm("name: t\ndescription: x\n"); + let ctx = super::super::extensions::CompileContext::for_test(&fm); + let result = generate_setup_job(&[], &[], &ctx).expect("call ok"); + assert!( + result.is_empty(), + "Empty setup steps with no extensions should return empty string" + ); } #[test] fn test_generate_setup_job_with_steps() { let step: serde_yaml::Value = serde_yaml::from_str("bash: echo setup").expect("valid yaml"); - let result = generate_setup_job(&[step]); + let fm = parse_test_fm("name: t\ndescription: x\n"); + let ctx = super::super::extensions::CompileContext::for_test(&fm); + let result = generate_setup_job(&[step], &[], &ctx).expect("call ok"); assert!(result.contains("Setup"), "Should define a Setup job"); - assert!(result.contains("displayName: \"Setup\""), "Should use simple display name"); + assert!( + result.contains("displayName: \"Setup\""), + "Should use simple display name" + ); assert!(result.contains("checkout: self"), "Should include self checkout"); assert!(result.contains("echo setup"), "Should include the step content"); assert!(result.contains("templateContext"), "Should include templateContext"); @@ -171,6 +226,10 @@ mod tests { assert!(!result.contains("pool:"), "Should not include per-job pool"); } + fn parse_test_fm(yaml: &str) -> crate::compile::types::FrontMatter { + serde_yaml::from_str(yaml).expect("parse fm") + } + // ─── generate_teardown_job ─────────────────────────────────────────────── #[test] diff --git a/src/compile/standalone.rs b/src/compile/standalone.rs index 063c7dff..1cd44565 100644 --- a/src/compile/standalone.rs +++ b/src/compile/standalone.rs @@ -48,8 +48,7 @@ impl Compiler for StandaloneCompiler { let extensions = super::extensions::collect_extensions(front_matter); // Build compile context for MCPG config generation - let input_dir = input_path.parent().unwrap_or(std::path::Path::new(".")); - let ctx = super::extensions::CompileContext::new(front_matter, input_dir).await?; + let ctx = super::extensions::CompileContext::new(front_matter, input_path).await?; // Standalone-specific values let allowed_domains = generate_allowed_domains(front_matter, &extensions)?; diff --git a/src/compile/types.rs b/src/compile/types.rs index 02acc7ee..bb44ce4b 100644 --- a/src/compile/types.rs +++ b/src/compile/types.rs @@ -23,6 +23,20 @@ pub enum CompileTarget { Stage, } +impl CompileTarget { + /// Canonical lowercase string for this target (matches the serde rename). + /// Used by the always-on ado-aw-marker extension when emitting the + /// machine-readable metadata blob. + pub fn as_str(&self) -> &'static str { + match self { + Self::Standalone => "standalone", + Self::OneES => "1es", + Self::Job => "job", + Self::Stage => "stage", + } + } +} + /// Pool configuration - accepts both string and object formats /// /// Examples: diff --git a/src/detect.rs b/src/detect.rs index 16eb33df..3d47f75f 100644 --- a/src/detect.rs +++ b/src/detect.rs @@ -5,6 +5,7 @@ use anyhow::{Context, Result}; use log::{debug, info}; +use serde::Deserialize; use std::path::{Path, PathBuf}; use tokio::io::AsyncBufReadExt; @@ -124,6 +125,84 @@ async fn try_detect_pipeline( Ok(None) } +/// Prefix of the machine-readable marker emitted into every compiled +/// pipeline by the always-on `ado-aw-marker` compiler extension. +/// +/// The marker is a `# ado-aw-metadata: { … JSON … }` line embedded +/// inside the bash body of an injected Setup-job step. The step body +/// (unlike top-of-file YAML comments) survives ADO Pipeline Preview +/// expansion, so this prefix is the canonical surface project-scope +/// discovery searches for in expanded YAML. +pub const MARKER_STEP_PREFIX: &str = "# ado-aw-metadata:"; + +/// Parsed metadata from a `# ado-aw-metadata: {…}` marker step line. +/// +/// The schema is forward-compatible: unknown JSON fields are ignored, +/// and missing fields fall through to defaults (empty string / zero). +/// Callers that need a specific field should check it explicitly. +#[derive(Debug, Clone, Deserialize, PartialEq, Eq)] +pub struct MarkerMetadata { + /// Schema version; `1` for the initial release. + #[serde(default)] + pub schema: u32, + /// Source markdown path, normalised forward-slash form (e.g. + /// `agents/release-readiness.md`). + #[serde(default)] + pub source: String, + /// Compiler version that produced this YAML (`CARGO_PKG_VERSION`). + #[serde(default)] + pub version: String, + /// Compile target: `standalone` / `1es` / `job` / `stage`. + #[serde(default)] + pub target: String, +} + +/// Scan raw pipeline YAML for `# ado-aw-metadata: {…}` marker lines. +/// +/// Returns one [`MarkerMetadata`] per parseable hit. Multiple hits in a +/// single document indicate a consumer pipeline that includes more than +/// one ado-aw-generated template; project-scope discovery uses that to +/// classify the definition as `Consumer`. Malformed JSON entries are +/// skipped (logged at debug) rather than panicking — defensive against +/// future schema drift inside the JSON blob. +/// +/// Designed to be called against either: +/// - Raw compiled-on-disk lock-file YAML, or +/// - The `finalYaml` returned by ADO's Pipeline Preview API (which +/// strips top-of-file comments but preserves step bodies). +pub fn parse_marker_step(yaml: &str) -> Vec { + let mut results = Vec::new(); + + for line in yaml.lines() { + // Trim leading whitespace; the marker lives inside an indented + // bash heredoc, so the actual `#` may be indented arbitrarily. + let trimmed = line.trim_start(); + let Some(rest) = trimmed.strip_prefix(MARKER_STEP_PREFIX) else { + continue; + }; + + let json_str = rest.trim(); + if !json_str.starts_with('{') { + // Marker prefix matched but no JSON payload — likely + // documentation or a non-marker comment. Skip silently. + continue; + } + + match serde_json::from_str::(json_str) { + Ok(meta) => results.push(meta), + Err(e) => { + log::debug!( + "Skipping malformed ado-aw-metadata marker: {} (payload: {})", + e, + json_str + ); + } + } + } + + results +} + /// Parsed metadata from a `# @ado-aw` header line. pub struct HeaderMetadata { pub source: String, @@ -201,6 +280,86 @@ pub fn parse_header_line(line: &str) -> Option { mod tests { use super::*; + #[test] + fn test_parse_marker_step_single() { + let yaml = "\ +- bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/foo.md\",\"version\":\"0.28.0\",\"target\":\"standalone\"} + echo hello + displayName: \"ado-aw\" +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].schema, 1); + assert_eq!(metas[0].source, "agents/foo.md"); + assert_eq!(metas[0].version, "0.28.0"); + assert_eq!(metas[0].target, "standalone"); + } + + #[test] + fn test_parse_marker_step_multiple() { + // A consumer pipeline pulling in two templates expands to two + // marker steps in finalYaml. + let yaml = "\ +jobs: + - job: a + steps: + - bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/a.md\",\"version\":\"0.28.0\",\"target\":\"job\"} + echo a + displayName: \"ado-aw\" + - job: b + steps: + - bash: | + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/b.md\",\"version\":\"0.27.0\",\"target\":\"job\"} + echo b + displayName: \"ado-aw\" +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 2); + let sources: Vec<&str> = metas.iter().map(|m| m.source.as_str()).collect(); + assert!(sources.contains(&"agents/a.md")); + assert!(sources.contains(&"agents/b.md")); + } + + #[test] + fn test_parse_marker_step_no_match_returns_empty() { + let yaml = "name: foo\njobs:\n - job: x\n steps:\n - bash: echo hi\n"; + assert!(parse_marker_step(yaml).is_empty()); + } + + #[test] + fn test_parse_marker_step_skips_malformed_json() { + // The prefix matches but the JSON is broken; the parser must + // skip silently rather than panic. A valid marker on a later + // line is still returned. + let yaml = "\ +- bash: | + # ado-aw-metadata: {not valid json + # ado-aw-metadata: {\"schema\":1,\"source\":\"agents/ok.md\",\"version\":\"1.0.0\",\"target\":\"stage\"} +"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].source, "agents/ok.md"); + } + + #[test] + fn test_parse_marker_step_tolerates_extra_json_fields() { + // Forward-compatibility: an unknown future field shouldn't + // break the parser. + let yaml = " # ado-aw-metadata: {\"schema\":2,\"source\":\"agents/x.md\",\"version\":\"1.0.0\",\"target\":\"standalone\",\"future_field\":[1,2,3]}\n"; + let metas = parse_marker_step(yaml); + assert_eq!(metas.len(), 1); + assert_eq!(metas[0].source, "agents/x.md"); + assert_eq!(metas[0].schema, 2); + } + + #[test] + fn test_parse_marker_step_ignores_prefix_without_json() { + let yaml = "# ado-aw-metadata: (manual documentation note, not a marker)\n"; + assert!(parse_marker_step(yaml).is_empty()); + } + #[test] fn test_parse_header_line_valid() { let line = "# @ado-aw source=agents/my-agent.md version=0.3.2"; diff --git a/src/enable.rs b/src/enable.rs index 726971d7..e2483516 100644 --- a/src/enable.rs +++ b/src/enable.rs @@ -453,6 +453,8 @@ mod tests { }), queue_status: status.map(str::to_string), path: None, + repository: None, + revision: None, } } diff --git a/src/list.rs b/src/list.rs index 03a00217..556066f5 100644 --- a/src/list.rs +++ b/src/list.rs @@ -313,6 +313,8 @@ mod tests { }), queue_status: status.map(str::to_string), path: folder.map(str::to_string), + repository: None, + revision: None, } } diff --git a/src/main.rs b/src/main.rs index faacb3db..b06f35f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -63,6 +63,18 @@ enum SecretsCmd { /// Explicit definition IDs (skips local-fixture auto-detection). #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project (not just those in the current repo). Implies the + /// discovery code path; ignores local lock files for matching. + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template (e.g. `agents/security-scan.md`). Activates + /// the discovery code path. **Without `--all-repos`, only + /// definitions in the current repository are searched** — pair + /// with `--all-repos` to search the full project. + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, /// List variable names + flags on every matched definition. Never prints values. List { @@ -77,6 +89,15 @@ enum SecretsCmd { json: bool, #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project (not just those in the current repo). + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template. **Without `--all-repos`, only definitions + /// in the current repository are searched.** + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, /// Delete a named variable from every matched definition. Delete { @@ -92,6 +113,15 @@ enum SecretsCmd { dry_run: bool, #[arg(long, value_delimiter = ',')] definition_ids: Option>, + /// Use Preview-driven discovery against every definition in the + /// project. + #[arg(long, conflicts_with = "definition_ids")] + all_repos: bool, + /// Filter discovered definitions to consumers of one specific + /// ado-aw template. **Without `--all-repos`, only definitions + /// in the current repository are searched.** + #[arg(long, conflicts_with = "definition_ids")] + source: Option, }, } @@ -892,6 +922,8 @@ async fn main() -> Result<()> { value_stdin, dry_run, definition_ids, + all_repos, + source, } => { secrets::run_set(secrets::SetOptions { name: &name, @@ -904,6 +936,8 @@ async fn main() -> Result<()> { value_stdin, dry_run, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } @@ -914,6 +948,8 @@ async fn main() -> Result<()> { pat, json, definition_ids, + all_repos, + source, } => { secrets::run_list(secrets::ListOptions { org: org.as_deref(), @@ -922,6 +958,8 @@ async fn main() -> Result<()> { path: path.as_deref(), json, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } @@ -933,6 +971,8 @@ async fn main() -> Result<()> { pat, dry_run, definition_ids, + all_repos, + source, } => { secrets::run_delete(secrets::DeleteOptions { name: &name, @@ -942,6 +982,8 @@ async fn main() -> Result<()> { path: path.as_deref(), dry_run, definition_ids: definition_ids.as_deref(), + all_repos, + source: source.as_deref(), }) .await?; } diff --git a/src/secrets.rs b/src/secrets.rs index b984be3b..9dd10364 100644 --- a/src/secrets.rs +++ b/src/secrets.rs @@ -22,6 +22,7 @@ use crate::ado::{ AdoAuth, AdoContext, MatchedDefinition, PATH_SEGMENT, get_definition_full, normalize_masked_secret_variable_values, resolve_ado_context, resolve_auth, resolve_definitions, + discovery::{DiscoveryScope, resolve_definitions_via_discovery}, }; /// Description of one pipeline variable, for listing only. @@ -174,6 +175,87 @@ pub struct SetOptions<'a> { pub value_stdin: bool, pub dry_run: bool, pub definition_ids: Option<&'a [u64]>, + /// Use Preview-driven discovery across every definition in the + /// project (not just those whose root YAML is a local lock file). + pub all_repos: bool, + /// Filter discovery results to consumers of one specific ado-aw + /// template source path (e.g. `agents/security-scan.md`). When set + /// alongside `all_repos=false`, scopes discovery to the current repo. + pub source: Option<&'a str>, +} + +/// Decide between the legacy lexical resolver and Preview-driven +/// discovery based on which flags the caller passed. Returns +/// `Ok(Some(vec))` on success, `Ok(None)` only when the legacy path +/// signaled "no local fixtures found; exit clean". +async fn resolve_for_command( + client: &reqwest::Client, + ado_ctx: &AdoContext, + auth: &AdoAuth, + definition_ids: Option<&[u64]>, + all_repos: bool, + source_filter: Option<&str>, + repo_path: &Path, +) -> Result>> { + // Discovery code path: activated by --all-repos or --source. + // Explicit definition_ids always takes precedence (escape hatch). + if definition_ids.is_none() && (all_repos || source_filter.is_some()) { + // CurrentRepo scope without a resolvable git remote is a + // user-friendly footgun: discovery would silently return zero + // results. Surface a targeted hint *before* spending an HTTP + // round-trip on a doomed listing. + if !all_repos && ado_ctx.repo_name.is_empty() { + anyhow::bail!( + "--source filters by the current repository, but no Azure DevOps git remote \ + was detected in `{}`.\n\ + Either run from a checkout of an ADO repo, or pass --all-repos to search \ + the entire project.", + repo_path.display() + ); + } + + let scope = if all_repos { + DiscoveryScope::AllRepos + } else { + DiscoveryScope::CurrentRepo + }; + + // Feed local lock files into discovery so its yamlFilename + // fast-path can skip a Preview call per locally-compiled + // pipeline. Best-effort: scan failures aren't fatal — discovery + // simply falls back to Preview for everything. + let local_lock_paths: Vec = match crate::detect::detect_pipelines(repo_path).await + { + Ok(detected) => detected.into_iter().map(|p| p.yaml_path).collect(), + Err(e) => { + log::debug!( + "Local-fixture scan failed during discovery ({e}); falling back to Preview \ + for every definition." + ); + Vec::new() + } + }; + let local_lock_slice = if local_lock_paths.is_empty() { + None + } else { + Some(local_lock_paths.as_slice()) + }; + + let matched = resolve_definitions_via_discovery( + client, + ado_ctx, + auth, + scope, + local_lock_slice, + source_filter, + ) + .await?; + return Ok(Some(matched)); + } + + // Legacy behaviour: explicit --definition-ids, or local-fixture + // lexical matching. Unchanged from before the discovery work. + resolve_definitions(client, ado_ctx, auth, definition_ids, repo_path).await } pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { @@ -197,11 +279,13 @@ pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -210,10 +294,12 @@ pub async fn run_set(opts: SetOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + let hint = if opts.all_repos || opts.source.is_some() { + "No ado-aw pipelines found via Preview-driven discovery. Run `ado-aw list --all-repos` to diagnose." + } else { + "No ADO definitions matched any local fixture. Run `ado-aw list` to diagnose, or try `--all-repos`." + }; + anyhow::bail!("{hint}"); } print_matched_summary(&matched); @@ -329,6 +415,8 @@ pub struct ListOptions<'a> { pub path: Option<&'a Path>, pub json: bool, pub definition_ids: Option<&'a [u64]>, + pub all_repos: bool, + pub source: Option<&'a str>, } pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { @@ -349,11 +437,13 @@ pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -362,10 +452,12 @@ pub async fn run_list(opts: ListOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + let hint = if opts.all_repos || opts.source.is_some() { + "No ado-aw pipelines found via Preview-driven discovery." + } else { + "No ADO definitions matched any local fixture. Run `ado-aw list` to diagnose, or try `--all-repos`." + }; + anyhow::bail!("{hint}"); } let mut payload = serde_json::json!({}); @@ -414,6 +506,8 @@ pub struct DeleteOptions<'a> { pub path: Option<&'a Path>, pub dry_run: bool, pub definition_ids: Option<&'a [u64]>, + pub all_repos: bool, + pub source: Option<&'a str>, } pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { @@ -436,11 +530,13 @@ pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { .build() .context("Failed to create HTTP client")?; - let Some(matched) = resolve_definitions( + let Some(matched) = resolve_for_command( &client, &ado_ctx, &auth, opts.definition_ids, + opts.all_repos, + opts.source, &repo_path, ) .await? @@ -449,10 +545,12 @@ pub async fn run_delete(opts: DeleteOptions<'_>) -> Result<()> { }; if matched.is_empty() { - anyhow::bail!( - "No ADO definitions matched any local fixture. Run `ado-aw list` to \ - diagnose." - ); + let hint = if opts.all_repos || opts.source.is_some() { + "No ado-aw pipelines found via Preview-driven discovery." + } else { + "No ADO definitions matched any local fixture. Run `ado-aw list` to diagnose, or try `--all-repos`." + }; + anyhow::bail!("{hint}"); } print_matched_summary(&matched); @@ -547,6 +645,8 @@ pub async fn run_set_github_token( value_stdin: false, dry_run, definition_ids, + all_repos: false, + source: None, }) .await } @@ -604,6 +704,48 @@ mod tests { assert_eq!(out["variables"]["FOO"]["allowOverride"], false); } + // ============ resolve_for_command precondition ============ + + #[tokio::test] + async fn source_without_all_repos_bails_when_no_git_remote() { + // CurrentRepo scope + empty repo_name = no git remote was + // detected. `--source` users hitting this case must get a + // targeted error mentioning `--all-repos`, not a generic + // "no pipelines found" further down the pipeline. + let ctx = AdoContext { + org_url: "https://dev.azure.com/example".to_string(), + project: "p".to_string(), + repo_name: String::new(), + }; + let auth = AdoAuth::Pat("token".to_string()); + let client = reqwest::Client::builder() + .build() + .expect("client builds"); + let tmp = tempfile::tempdir().unwrap(); + + let err = resolve_for_command( + &client, + &ctx, + &auth, + None, + false, + Some("agents/foo.md"), + tmp.path(), + ) + .await + .expect_err("expected bail"); + + let msg = format!("{err}"); + assert!( + msg.contains("--all-repos"), + "error should suggest --all-repos; got: {msg}" + ); + assert!( + msg.contains("no Azure DevOps git remote"), + "error should explain the root cause; got: {msg}" + ); + } + #[test] fn set_preserves_other_variables() { let def = serde_json::json!({ diff --git a/tests/compiler_tests.rs b/tests/compiler_tests.rs index 1ced57d8..e0a7aca8 100644 --- a/tests/compiler_tests.rs +++ b/tests/compiler_tests.rs @@ -3673,6 +3673,81 @@ fn assert_valid_yaml(compiled: &str, fixture_name: &str) { ); } +// ─── ado-aw marker step (always-on extension) ─────────────────────────────── + +/// Assert that compiled YAML carries exactly one `# ado-aw-metadata: {…}` +/// marker line whose JSON includes the expected source path and target. +/// +/// The marker step is injected by the always-on `ado-aw-marker` compiler +/// extension and is the canonical surface project-scope discovery uses +/// to find ado-aw pipelines in expanded YAML (see `src/detect.rs::parse_marker_step`). +fn assert_marker_step_present( + compiled: &str, + expected_source_suffix: &str, + expected_target: &str, + fixture_name: &str, +) { + let marker_lines: Vec<&str> = compiled + .lines() + .filter(|l| l.trim_start().starts_with("# ado-aw-metadata:")) + .collect(); + assert_eq!( + marker_lines.len(), + 1, + "{fixture_name}: expected exactly one '# ado-aw-metadata:' marker in compiled YAML, found {}\nLines: {:#?}", + marker_lines.len(), + marker_lines, + ); + let line = marker_lines[0]; + assert!( + line.contains(&format!("\"target\":\"{expected_target}\"")), + "{fixture_name}: marker line does not declare target={expected_target}: {line}" + ); + assert!( + line.contains("\"schema\":1"), + "{fixture_name}: marker line missing schema=1: {line}" + ); + assert!( + line.contains(&format!("\"source\":\"")) && line.contains(expected_source_suffix), + "{fixture_name}: marker line does not include source suffix {expected_source_suffix}: {line}" + ); + // The runtime echo on the next line should mirror the same data + // (this is the human-facing build-log surface). + assert!( + compiled.contains("ado-aw metadata: source="), + "{fixture_name}: compiled YAML missing runtime echo line for ado-aw marker" + ); + // displayName: "ado-aw" identifies the injected step uniquely. + assert!( + compiled.contains("displayName: \"ado-aw\""), + "{fixture_name}: compiled YAML missing displayName: \"ado-aw\" on injected step" + ); +} + +#[test] +fn test_marker_step_present_in_standalone_target() { + let compiled = compile_fixture("minimal-agent.md"); + assert_marker_step_present(&compiled, "minimal-agent.md", "standalone", "minimal-agent.md"); +} + +#[test] +fn test_marker_step_present_in_1es_target() { + let compiled = compile_fixture("1es-test-agent.md"); + assert_marker_step_present(&compiled, "1es-test-agent.md", "1es", "1es-test-agent.md"); +} + +#[test] +fn test_marker_step_present_in_job_target() { + let compiled = compile_fixture("job-agent.md"); + assert_marker_step_present(&compiled, "job-agent.md", "job", "job-agent.md"); +} + +#[test] +fn test_marker_step_present_in_stage_target() { + let compiled = compile_fixture("stage-agent.md"); + assert_marker_step_present(&compiled, "stage-agent.md", "stage", "stage-agent.md"); +} + /// Test that the 1ES fixture produces valid YAML with correct structure #[test] fn test_1es_compiled_output_is_valid_yaml() {