From 0f6ffc17b8e63d284410908a6a744ee7d9e1c18c Mon Sep 17 00:00:00 2001 From: Abbas Jafari Date: Tue, 19 May 2026 09:31:55 +0200 Subject: [PATCH] feat(marketplace): accept host-prefixed and URL source forms with per-host auth --- CHANGELOG.md | 4 + .../content/docs/reference/manifest-schema.md | 14 +- .../skills/apm-usage/package-authoring.md | 13 + src/apm_cli/marketplace/builder.py | 154 ++++++- src/apm_cli/marketplace/output_mappers.py | 23 +- src/apm_cli/marketplace/publisher.py | 21 +- src/apm_cli/marketplace/yml_editor.py | 11 +- src/apm_cli/marketplace/yml_schema.py | 75 +++- tests/unit/marketplace/test_builder.py | 381 ++++++++++++++++++ tests/unit/marketplace/test_yml_editor.py | 22 + tests/unit/marketplace/test_yml_schema.py | 184 +++++++++ 11 files changed, 860 insertions(+), 42 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1110f2352..d23561cfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- `marketplace.packages[].source` in `apm.yml` now accepts non-default git hosts via the `host.tld/owner/repo` shorthand or the full `https://host.tld/owner/repo[.git]` URL; per-host auth flows through the standard APM token chain. Unlocks GitHub Enterprise and self-hosted GitLab as first-class marketplace package sources. (#1288) + ### Fixed - Copilot, Codex, Cursor, Claude, Windsurf, OpenCode, and Gemini adapters handle MCP v0.1 `runtimeArguments`/`packageArguments` with `variables` (no `type` key), matching the VS Code fix from #1444. (#1461, closes #1452, thanks @sergio-sisternes-epam) diff --git a/docs/src/content/docs/reference/manifest-schema.md b/docs/src/content/docs/reference/manifest-schema.md index 2d53f7eec..f0b88da36 100644 --- a/docs/src/content/docs/reference/manifest-schema.md +++ b/docs/src/content/docs/reference/manifest-schema.md @@ -605,7 +605,7 @@ Each entry MUST be a mapping. Unknown keys are rejected. | Field | Type | Required | Description | |---|---|---|---| | `name` | `string` | REQUIRED | Package identifier as it appears in the marketplace. | -| `source` | `string` | REQUIRED | `/` (remote) or `./` (local). Must match the source pattern; path traversal (`..`) is refused. | +| `source` | `string` | REQUIRED | One of: `/` (remote on the default host), `//` (remote on a non-default host such as GitHub Enterprise or self-hosted GitLab -- shorthand), `https:////[.git]` (same, full URL form -- a trailing `.git` is stripped), or `./` (local). Must match the source pattern; path traversal (`..`) is refused, and URL forms with userinfo (`user@host`), ports, query strings, or non-`https` schemes are rejected. | | `subdir` | `string` | OPTIONAL | Subdirectory inside the source repo. Path-traversal-validated. Ignored for local sources. | | `version` | `string` | Conditional | Semver range (e.g. `^1.0.0`, `~2.1.0`, `>=3.0`). Stored as a string; resolution happens at pack time. REQUIRED for remote packages unless `ref` is given. | | `ref` | `string` | Conditional | Explicit git ref (SHA, tag, or branch). Overrides `version` range when both are present. REQUIRED for remote packages unless `version` is given. | @@ -621,6 +621,10 @@ Each entry MUST be a mapping. Unknown keys are rejected. Remote packages MUST declare at least one of `version` or `ref`. Local packages (sources beginning with `./`) skip git resolution and have no version requirement. +The first three `source` forms target a remote git host; the second and third name a non-default host (e.g. GitHub Enterprise, self-hosted GitLab) as either a shorthand or a full HTTPS URL with an optional `.git` suffix that is normalized away. Path traversal (`..`) in local paths, userinfo (`user@host`), ports, query strings, and non-`https` URL schemes are rejected at parse time. + +Non-default hosts authenticate via the standard APM token chain -- see the [authentication guide](../../getting-started/authentication/) for the per-host-class lookup order. A token resolved for the default host is never forwarded to a non-default host. + ### 7.6. Complete Marketplace Block ```yaml @@ -653,6 +657,14 @@ marketplace: - name: local-tool # local-path package source: ./packages/local-tool description: Vendored tool + + - name: enterprise-agents # GHE shorthand + source: ghe.corp.example.com/platform/agents + version: "^0.3.0" + + - name: gitlab-helper # full URL form + source: https://gitlab.corp.example.com/team/helper.git + ref: v1.2.0 ``` The legacy standalone `marketplace.yml` (top-level keys, no `marketplace:` wrapper) is still loadable but deprecated; new repositories SHOULD use the in-`apm.yml` form scaffolded by `apm marketplace init`. diff --git a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md index 1dcf070f7..c5664fa11 100644 --- a/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md +++ b/packages/apm-guide/.apm/skills/apm-usage/package-authoring.md @@ -333,6 +333,13 @@ marketplace: description: Plugin shipped alongside this repo source: ./plugins/local-tool # local path (no remote fetch) version: 0.1.0 + + - name: enterprise-plugin + description: Hosted on GitHub Enterprise + source: ghe.corp.example.com/platform/agents # host.tld/owner/repo + version: "^0.3.0" + # Equivalent full URL form (trailing .git is stripped): + # source: https://ghe.corp.example.com/platform/agents.git ``` Schema rules: @@ -342,6 +349,12 @@ Schema rules: - `ref` takes precedence over `version`. - `source: ./...` marks a local-path entry: skips git resolution, emits the path verbatim into `marketplace.json`. +- `source` accepts three remote forms: `owner/repo` (default host), + `host.tld/owner/repo` (non-default host shorthand), or + `https://host.tld/owner/repo[.git]` (full URL). Non-default hosts + resolve auth via the standard APM token chain + (`docs/getting-started/authentication.md`); the default-host token is + never forwarded. - `versioning.strategy` is optional. When present, it is consumed by the `apm pack --check-versions` release gate to enforce alignment between each local package's `version:` field and the marketplace diff --git a/src/apm_cli/marketplace/builder.py b/src/apm_cli/marketplace/builder.py index ac8353af4..7a3d100d2 100644 --- a/src/apm_cli/marketplace/builder.py +++ b/src/apm_cli/marketplace/builder.py @@ -20,6 +20,7 @@ import json import logging import re +import threading import urllib.error import urllib.request from concurrent.futures import ThreadPoolExecutor, as_completed @@ -92,6 +93,7 @@ class ResolvedPackage: requested_version: str | None # original APM-only range (for diagnostics) tags: tuple[str, ...] is_prerelease: bool # True if the resolved ref was a prerelease semver + host: str | None = None # non-default git host parsed from apm.yml source @dataclass(frozen=True) @@ -310,6 +312,11 @@ def __init__( self._host: str = default_host() or "github.com" self._host_info: HostInfo | None = None self._auth_resolved: bool = False + # Per-host RefResolver cache, keyed by host override on PackageEntry. + # Pre-warmed on the main thread before workers spawn; lock guards + # against future refactors that allow worker-side cache misses. + self._host_resolvers: dict[str, RefResolver] = {} + self._host_resolvers_lock = threading.Lock() @classmethod def from_config( @@ -363,6 +370,75 @@ def _get_resolver(self) -> RefResolver: ) return self._resolver + def _effective_host(self, host: str | None) -> str | None: + """Normalize ``host`` for marketplace.json emission. + + Returns ``None`` when ``host`` matches the active default host so + an explicit ``github.com/owner/repo`` source in apm.yml emits the + same shorthand (``source: github``, ``repo: owner/repo``) shape as + the bare ``owner/repo`` form. Non-default hosts pass through + unchanged and downstream mappers emit ``source: url`` / + ``source: git-subdir`` with the full HTTPS URL. + """ + if host is None or host == self._host: + return None + return host + + def _get_resolver_for_host(self, host: str | None) -> RefResolver: + """Return a RefResolver bound to *host* (default when ``None``). + + Non-default hosts go through ``AuthResolver.resolve(host)`` so that + ``GITHUB_APM_PAT``, ``GITHUB_APM_PAT_{ORG}``, ``GITHUB_TOKEN`` and + ``GH_TOKEN`` are consulted before falling back to ambient git + credentials (SSH key / credential helper). Per-host resolvers are + cached for the lifetime of the build so each unique host pays the + auth-resolution cost only once. + """ + if host is None or host == self._host: + return self._get_resolver() + with self._host_resolvers_lock: + cached = self._host_resolvers.get(host) + if cached is not None: + return cached + token = self._resolve_token_for_host(host) + logger.debug( + "Creating per-host RefResolver for %s (token=%s)", + host, + "set" if token else "unset", + ) + resolver = RefResolver( + timeout_seconds=self._options.timeout_seconds, + offline=self._options.offline, + host=host, + token=token, + ) + self._host_resolvers[host] = resolver + return resolver + + def _resolve_token_for_host(self, host: str) -> str | None: + """Resolve an auth token for a non-default *host* via ``AuthResolver``. + + Returns ``None`` -- letting ``git`` fall back to ambient credentials + -- when offline, when no token is configured for the host, or when + ``AuthResolver`` raises. Never raises. + """ + if self._options.offline: + return None + try: + from ..core.auth import AuthResolver # lazy import + + resolver = self._auth_resolver + if resolver is None: + resolver = AuthResolver() + self._auth_resolver = resolver + ctx = resolver.resolve(host) # type: ignore[union-attr] + if ctx.token: + logger.debug("Resolved token for host %s (source=%s)", host, ctx.source) + return ctx.token + except Exception: + logger.debug("Could not resolve token for host %s", host, exc_info=True) + return None + def _ensure_auth(self) -> None: """Lazily resolve host classification and GitHub token. @@ -441,7 +517,7 @@ def _resolve_entry(self, entry: PackageEntry) -> ResolvedPackage: is_prerelease=False, ) yml = self._load_yml() - resolver = self._get_resolver() + resolver = self._get_resolver_for_host(entry.host) owner_repo = entry.source if entry.ref is not None: @@ -471,6 +547,7 @@ def _resolve_explicit_ref( requested_version=entry.version, tags=entry.tags, is_prerelease=sv.is_prerelease if sv else False, + host=self._effective_host(entry.host), ) refs = resolver.list_remote_refs(owner_repo) @@ -491,6 +568,7 @@ def _resolve_explicit_ref( requested_version=entry.version, tags=entry.tags, is_prerelease=sv.is_prerelease if sv else False, + host=self._effective_host(entry.host), ) # Try as full refname @@ -510,6 +588,7 @@ def _resolve_explicit_ref( requested_version=entry.version, tags=entry.tags, is_prerelease=sv.is_prerelease if sv else False, + host=self._effective_host(entry.host), ) # Try as branch name @@ -526,6 +605,7 @@ def _resolve_explicit_ref( requested_version=entry.version, tags=entry.tags, is_prerelease=False, + host=self._effective_host(entry.host), ) # HEAD special case @@ -584,6 +664,7 @@ def _resolve_version_range( requested_version=version_range, tags=entry.tags, is_prerelease=best_sv.is_prerelease, + host=self._effective_host(entry.host), ) # -- concurrent resolution ---------------------------------------------- @@ -613,6 +694,12 @@ def resolve(self) -> ResolveResult: # spawning workers -- avoids a race on _ensure_auth() and # matches the pattern used in _prefetch_metadata(). self._get_resolver() + # Pre-warm any per-host resolvers needed by entries that override the + # default host via the ``host.tld/owner/repo`` source form. Done on + # the main thread so workers never race to create the same resolver. + for entry in entries: + if entry.host: + self._get_resolver_for_host(entry.host) with ThreadPoolExecutor(max_workers=min(self._options.concurrency, len(entries))) as pool: future_to_index = { @@ -656,54 +743,71 @@ def _fetch_remote_metadata(self, pkg: ResolvedPackage) -> dict[str, str] | None: ``None`` on any error. This is purely cosmetic enrichment -- failures are silently logged at debug level and never propagate. - When a GitHub token is available (via ``self._github_token``), it - is included as an ``Authorization`` header so private repos can be - accessed. + When a token is available for the package's host, it is included + as an ``Authorization`` header so private repos can be accessed. + A token resolved for the builder's default host is never sent to + another host. - For non-github.com GitHub-family hosts (GHES, GHE Cloud), uses the - GitHub REST API instead of raw.githubusercontent.com (which is only - available for github.com). For non-GitHub hosts, metadata - enrichment is skipped. + Each package is fetched from its own host: ``github.com`` + packages use the fast ``raw.githubusercontent.com`` CDN; GHES + and GHE Cloud packages use the GitHub REST API on the package's + host. For non-GitHub-class hosts, metadata enrichment is + skipped. """ try: path_prefix = f"{pkg.subdir}/" if pkg.subdir else "" file_path = f"{path_prefix}apm.yml" - # Determine URL strategy based on host kind - host_kind = self._host_info.kind if self._host_info else "github" + # Resolve the effective host for this package and its + # classification. Falls back to the builder default when the + # package did not carry an explicit host override. + effective_host = pkg.host or self._host + if pkg.host is None or pkg.host == self._host: + host_info = self._host_info + token = self._github_token + else: + from ..core.auth import AuthResolver # lazy import + + try: + host_info = AuthResolver.classify_host(effective_host) + except Exception: + host_info = None + token = self._resolve_token_for_host(effective_host) + + host_kind = host_info.kind if host_info else "github" if host_kind not in ("github", "ghe_cloud", "ghes"): # Non-GitHub hosts -- skip metadata enrichment logger.debug( "Skipping metadata fetch for %s (non-GitHub host: %s)", pkg.name, - self._host, + effective_host, ) return None - if host_kind == "ghe_cloud" and not self._github_token: + if host_kind == "ghe_cloud" and not token: logger.debug( "Skipping metadata fetch for %s (GHE Cloud requires auth)", pkg.name, ) return None - if self._host == "github.com": + if effective_host == "github.com": # github.com -- use fast raw.githubusercontent.com CDN url = f"https://raw.githubusercontent.com/{pkg.source_repo}/{pkg.sha}/{file_path}" req = urllib.request.Request(url) # noqa: S310 - if self._github_token: - req.add_header("Authorization", f"token {self._github_token}") + if token: + req.add_header("Authorization", f"token {token}") else: - # GHES / GHE Cloud -- use REST API + # GHES / GHE Cloud -- use REST API on the package's host api_base = ( - self._host_info.api_base if self._host_info else None - ) or f"https://{self._host}/api/v3" + host_info.api_base if host_info else None + ) or f"https://{effective_host}/api/v3" url = f"{api_base}/repos/{pkg.source_repo}/contents/{file_path}?ref={pkg.sha}" req = urllib.request.Request(url) # noqa: S310 req.add_header("Accept", "application/vnd.github.raw") - if self._github_token: - req.add_header("Authorization", f"token {self._github_token}") + if token: + req.add_header("Authorization", f"token {token}") with urllib.request.urlopen(req, timeout=5) as resp: # noqa: S310 raw = resp.read().decode("utf-8") @@ -999,9 +1103,17 @@ def build(self) -> BuildReport: ), ) - # Cleanup resolver + # Cleanup default + per-host resolvers so long-lived builder + # instances do not leak caches or thread locks across builds. if self._resolver is not None: self._resolver.close() + with self._host_resolvers_lock: + for host_resolver in self._host_resolvers.values(): + try: + host_resolver.close() + except Exception: # pragma: no cover - close is best-effort + logger.debug("Failed to close per-host RefResolver", exc_info=True) + self._host_resolvers.clear() return BuildReport( outputs=report.outputs, diff --git a/src/apm_cli/marketplace/output_mappers.py b/src/apm_cli/marketplace/output_mappers.py index 049cbb844..fa7183b26 100644 --- a/src/apm_cli/marketplace/output_mappers.py +++ b/src/apm_cli/marketplace/output_mappers.py @@ -172,11 +172,22 @@ def compose( ) plugin["source"] = source_value else: + # Remote source: emit per the official Claude Code marketplace + # schema. When the package was authored with a host-prefixed + # source (``host.tld/owner/repo``), emit a real ``https://`` + # URL so Claude Code can clone from a non-default host (e.g. + # GHE) -- the ``github`` shorthand only resolves to github.com. source_obj: dict[str, Any] = OrderedDict() if pkg.subdir: source_obj["source"] = "git-subdir" - source_obj["url"] = pkg.source_repo + if pkg.host: + source_obj["url"] = f"https://{pkg.host}/{pkg.source_repo}" + else: + source_obj["url"] = pkg.source_repo source_obj["path"] = pkg.subdir + elif pkg.host: + source_obj["source"] = "url" + source_obj["url"] = f"https://{pkg.host}/{pkg.source_repo}" else: source_obj["source"] = "github" source_obj["repo"] = pkg.source_repo @@ -267,7 +278,10 @@ def _codex_source(entry: PackageEntry, pkg: ResolvedPackage) -> dict[str, Any]: if pkg.subdir: source_obj: dict[str, Any] = OrderedDict() source_obj["source"] = "git-subdir" - source_obj["url"] = pkg.source_repo + if pkg.host: + source_obj["url"] = f"https://{pkg.host}/{pkg.source_repo}" + else: + source_obj["url"] = pkg.source_repo source_obj["path"] = pkg.subdir if pkg.ref: source_obj["ref"] = pkg.ref @@ -277,7 +291,10 @@ def _codex_source(entry: PackageEntry, pkg: ResolvedPackage) -> dict[str, Any]: source_obj = OrderedDict() source_obj["source"] = "url" - source_obj["url"] = pkg.source_repo + if pkg.host: + source_obj["url"] = f"https://{pkg.host}/{pkg.source_repo}" + else: + source_obj["url"] = pkg.source_repo if pkg.ref: source_obj["ref"] = pkg.ref if pkg.sha: diff --git a/src/apm_cli/marketplace/publisher.py b/src/apm_cli/marketplace/publisher.py index d4de9cb41..3fa93ee42 100644 --- a/src/apm_cli/marketplace/publisher.py +++ b/src/apm_cli/marketplace/publisher.py @@ -49,11 +49,12 @@ from ._io import atomic_write from .errors import MarketplaceError, MarketplaceYmlError # noqa: F401 from .git_stderr import translate_git_stderr +from .migration import load_marketplace_config from .ref_resolver import RefResolver from .resolver import parse_marketplace_ref from .semver import parse_semver from .tag_pattern import render_tag -from .yml_schema import load_marketplace_yml +from .yml_schema import load_marketplace_yml # noqa: F401 (kept for back-compat) logger = logging.getLogger(__name__) @@ -287,7 +288,8 @@ class MarketplacePublisher: Parameters ---------- marketplace_root: - Path to the marketplace repository root (must contain + Path to the marketplace repository root (must contain an + ``apm.yml`` with a ``marketplace`` block, or the legacy ``marketplace.yml``). ref_resolver: Optional ``RefResolver`` instance (reserved for future use). @@ -314,10 +316,9 @@ def __init__( self._yml = None def _load_yml(self): - """Lazy-load marketplace.yml.""" + """Lazy-load marketplace config (apm.yml or legacy marketplace.yml).""" if self._yml is None: - yml_path = self._root / "marketplace.yml" - self._yml = load_marketplace_yml(yml_path) + self._yml = load_marketplace_config(self._root) return self._yml # -- plan --------------------------------------------------------------- @@ -332,9 +333,10 @@ def plan( ) -> PublishPlan: """Compute a publish plan. - Reads the local ``marketplace.yml`` to discover the marketplace - name and version, validates all targets, and computes a - deterministic branch name and commit message. + Reads the local marketplace config (``apm.yml`` or legacy + ``marketplace.yml``) to discover the marketplace name and version, + validates all targets, and computes a deterministic branch name + and commit message. Parameters ---------- @@ -356,7 +358,8 @@ def plan( Raises ------ MarketplaceYmlError - If ``marketplace.yml`` cannot be loaded or is invalid. + If the marketplace config (``apm.yml`` or legacy + ``marketplace.yml``) cannot be loaded or is invalid. PathTraversalError If any target's ``path_in_repo`` is a path traversal. """ diff --git a/src/apm_cli/marketplace/yml_editor.py b/src/apm_cli/marketplace/yml_editor.py index d1dca6b9c..b892e7189 100644 --- a/src/apm_cli/marketplace/yml_editor.py +++ b/src/apm_cli/marketplace/yml_editor.py @@ -132,10 +132,17 @@ def _find_entry_index(packages, name: str) -> int: def _validate_source(source: str) -> None: - """Validate that *source* has ``owner/repo`` shape or ``./...`` local path.""" + """Validate that *source* has one of the accepted shapes. + + Accepts ``owner/repo``, ``host.tld/owner/repo``, ``https://host.tld/ + owner/repo[.git]`` (remote forms), or ``./`` (local). + """ if not SOURCE_RE.match(source): raise MarketplaceYmlError( - f"'source' must match '/' or './' shape, got '{source}'" + f"'source' must be one of " + f"'/', '//', " + f"'https:////[.git]', or './', " + f"got '{source}'" ) try: validate_path_segments(source, context="source", allow_current_dir=True) diff --git a/src/apm_cli/marketplace/yml_schema.py b/src/apm_cli/marketplace/yml_schema.py index 0dee55164..18beac11f 100644 --- a/src/apm_cli/marketplace/yml_schema.py +++ b/src/apm_cli/marketplace/yml_schema.py @@ -69,11 +69,60 @@ r"(?:\+[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*)?$" ) -# Source field accepts either ``owner/repo`` (remote) or ``./...`` (local -# path within the same repo). Used by both yml_schema and yml_editor for -# source field validation. -SOURCE_RE = re.compile(r"^(?:[^/]+/[^/]+|\./.*)$") +# Source field accepts: +# - ``owner/repo`` (remote, default host) +# - ``host.tld/owner/repo`` (remote on a non-default host, shorthand) +# - ``https://host.tld/owner/repo`` (remote on a non-default host, full URL) +# - ``https://host.tld/owner/repo.git`` (same, with optional ``.git`` suffix) +# - ``./...`` (local path within the same repo) +# +# Used by both yml_schema and yml_editor for source field validation. +# +# The host segment is restricted to RFC-1123 hostname characters +# (letters, digits, hyphens, dots) and must contain at least one dot +# (i.e. look like a FQDN, to disambiguate from ``owner/repo``). Userinfo +# (``user@host``), port (``host:port``), query strings, fragments, SSH SCP +# (``git@host:path``) and non-``https`` URL schemes are explicitly rejected +# to avoid RFC 3986 confused-deputy attacks. +_HOST_PAT = r"(?:[A-Za-z0-9](?:[A-Za-z0-9-]*[A-Za-z0-9])?\.)+[A-Za-z][A-Za-z0-9-]*" +_OWNER_REPO_PAT = r"[A-Za-z0-9._-]+/[A-Za-z0-9._-]+" + +SOURCE_RE = re.compile( + r"^(?:" + rf"https://{_HOST_PAT}/{_OWNER_REPO_PAT}(?:\.git)?" + rf"|{_HOST_PAT}/{_OWNER_REPO_PAT}" + rf"|{_OWNER_REPO_PAT}" + r"|\./.*" + r")$" +) LOCAL_SOURCE_RE = re.compile(r"^\./") +# Matches ``host.tld/owner/repo`` (3 segments, first is FQDN-ish). +_HOST_PREFIXED_SOURCE_RE = re.compile(rf"^({_HOST_PAT})/({_OWNER_REPO_PAT})$") +# Matches ``https://host.tld/owner/repo[.git]`` and captures host + owner/repo. +_HTTPS_URL_SOURCE_RE = re.compile(rf"^https://({_HOST_PAT})/({_OWNER_REPO_PAT})(?:\.git)?$") + + +def split_host_from_source(source: str) -> tuple[str | None, str]: + """Split a host-qualified source into ``(host, owner/repo)``. + + Accepts both shorthand (``host.tld/owner/repo``) and full HTTPS URL + (``https://host.tld/owner/repo[.git]``) forms. Returns ``(None, source)`` + for the plain ``owner/repo`` shorthand or local ``./...`` paths. + + A trailing ``.git`` suffix on the repo segment is stripped so the + returned ``owner/repo`` is normalized regardless of input form. + """ + m = _HTTPS_URL_SOURCE_RE.match(source) + if m: + host, owner_repo = m.group(1), m.group(2) + if owner_repo.endswith(".git"): + owner_repo = owner_repo[: -len(".git")] + return host, owner_repo + m = _HOST_PREFIXED_SOURCE_RE.match(source) + if m: + return m.group(1), m.group(2) + return None, source + # Placeholder tokens accepted in ``tag_pattern`` / ``build.tagPattern``. _TAG_PLACEHOLDERS = ("{version}", "{name}") @@ -285,6 +334,10 @@ class PackageEntry: category: str | None = None # Derived (set by loader, not by user) is_local: bool = False + # Optional non-default git host parsed from ``source`` of the form + # ``host.tld/owner/repo``. ``None`` means use the default host + # (``GITHUB_HOST`` env or ``github.com``). + host: str | None = None @dataclass(frozen=True) @@ -383,12 +436,16 @@ def _validate_semver(version: str, *, context: str = "version") -> None: def _validate_source(source: str, *, index: int) -> None: """Validate ``source`` field shape and path safety. - Accepts either ``owner/repo`` (remote) or ``./...`` (local path). + Accepts ``owner/repo``, ``host.tld/owner/repo``, ``https://host.tld/ + owner/repo[.git]``, or ``./``. """ ctx = f"packages[{index}].source" if not SOURCE_RE.match(source): raise MarketplaceYmlError( - f"'{ctx}' must match '/' or './' shape, got '{source}'" + f"'{ctx}' must be one of " + f"'/', '//', " + f"'https:////[.git]', or './', " + f"got '{source}'" ) is_local = bool(LOCAL_SOURCE_RE.match(source)) try: @@ -655,6 +712,11 @@ def _parse_package_entry(raw: Any, index: int) -> PackageEntry: source = _require_str(raw, "source", context=f"packages[{index}]") _validate_source(source, index=index) is_local = bool(LOCAL_SOURCE_RE.match(source)) + # Detect host-prefixed source (e.g. ``host.tld/owner/repo``) and split + # the host off so downstream consumers continue to see ``owner/repo``. + host: str | None = None + if not is_local: + host, source = split_host_from_source(source) # APM-only: subdir (irrelevant for local packages but harmless) subdir: str | None = raw.get("subdir") @@ -807,6 +869,7 @@ def _parse_package_entry(raw: Any, index: int) -> PackageEntry: repository=repository, category=category, is_local=is_local, + host=host, ) diff --git a/tests/unit/marketplace/test_builder.py b/tests/unit/marketplace/test_builder.py index f00c170f9..25d48995c 100644 --- a/tests/unit/marketplace/test_builder.py +++ b/tests/unit/marketplace/test_builder.py @@ -133,6 +133,28 @@ def _build_with_mock( return builder.build() +def _build_with_host_mock( + tmp_path: Path, + yml_content: str, + refs_by_remote: dict[str, list[RemoteRef]], + options: BuildOptions | None = None, +) -> BuildReport: + """Build with a mock resolver that handles default *and* host-prefixed entries. + + The standard ``_build_with_mock`` only patches ``_resolver`` (the default + resolver). Host-prefixed entries trigger ``_get_resolver_for_host`` which + constructs a real ``RefResolver`` bound to the override host. For unit + tests we want every host to resolve through the same in-memory mock. + """ + yml_path = _write_yml(tmp_path, yml_content) + opts = options or BuildOptions(offline=True) + builder = MarketplaceBuilder(yml_path, opts) + mock = _MockRefResolver(refs_by_remote) + builder._resolver = mock # type: ignore[assignment] + builder._get_resolver_for_host = lambda _host: mock # type: ignore[assignment] + return builder.build() + + # --------------------------------------------------------------------------- # parse_semver # --------------------------------------------------------------------------- @@ -910,6 +932,257 @@ def test_no_subdir_no_path(self, tmp_path: Path) -> None: cr = data["plugins"][0] assert "path" not in cr["source"] + # -- default-host (``owner/repo``) ------------------------------------ + + def test_default_host_emits_github_shorthand(self, tmp_path: Path) -> None: + """A plain ``owner/repo`` source emits the ``github`` shorthand form.""" + refs = {"acme/code-reviewer": _make_refs("v2.0.0")} + yml = """\ +name: acme-tools +description: Test +version: 1.0.0 +owner: + name: Acme + email: t@acme.example.com + url: https://acme.example.com +packages: + - name: code-reviewer + source: acme/code-reviewer + version: "^2.0.0" +""" + report = _build_with_mock(tmp_path, yml, refs) + data = json.loads(report.output_path.read_text("utf-8")) + src = data["plugins"][0]["source"] + assert src["source"] == "github" + assert src["repo"] == "acme/code-reviewer" + assert "url" not in src + # The github shorthand never carries a ``path`` key. + assert "path" not in src + + # -- host-prefixed sources -------------------------------------------- + + def test_host_prefixed_without_subdir_emits_url_source(self, tmp_path: Path) -> None: + """``host.tld/owner/repo`` (no subdir) emits a full URL via ``source: url``.""" + refs = {"acme/agents": _make_refs("v0.3.0")} + yml = """\ +name: ghe-tools +description: Test +version: 1.0.0 +owner: + name: Acme + email: t@acme.example.com + url: https://acme.example.com +packages: + - name: baseline-rules + source: ghe.example.com/acme/agents + ref: v0.3.0 +""" + report = _build_with_host_mock(tmp_path, yml, refs) + data = json.loads(report.output_path.read_text("utf-8")) + src = data["plugins"][0]["source"] + assert src["source"] == "url" + assert src["url"] == "https://ghe.example.com/acme/agents" + assert "repo" not in src + assert "path" not in src + + def test_host_prefixed_with_subdir_emits_git_subdir_url(self, tmp_path: Path) -> None: + """``host.tld/owner/repo`` + subdir emits ``git-subdir`` with a full https URL.""" + refs = {"acme/agents": _make_refs("v0.3.0")} + yml = """\ +name: ghe-tools +description: Test +version: 1.0.0 +owner: + name: Acme + email: t@acme.example.com + url: https://acme.example.com +packages: + - name: baseline-rules + source: ghe.example.com/acme/agents + subdir: packages/baseline-rules + ref: v0.3.0 +""" + report = _build_with_host_mock(tmp_path, yml, refs) + data = json.loads(report.output_path.read_text("utf-8")) + src = data["plugins"][0]["source"] + assert src["source"] == "git-subdir" + assert src["url"] == "https://ghe.example.com/acme/agents" + assert src["path"] == "packages/baseline-rules" + + def test_default_host_with_subdir_emits_shorthand_url(self, tmp_path: Path) -> None: + """``owner/repo`` + subdir keeps the historical ``url: owner/repo`` shape.""" + refs = {"acme/test-generator": _make_refs("v1.0.0")} + yml = """\ +name: acme-tools +description: Test +version: 1.0.0 +owner: + name: Acme + email: t@acme.example.com + url: https://acme.example.com +packages: + - name: test-generator + source: acme/test-generator + version: "~1.0.0" + subdir: src/plugin +""" + report = _build_with_mock(tmp_path, yml, refs) + data = json.loads(report.output_path.read_text("utf-8")) + src = data["plugins"][0]["source"] + assert src["source"] == "git-subdir" + # Default host: keep the shorthand "owner/repo" (no scheme), preserving + # backwards compatibility with marketplaces emitted before host-prefix + # support landed. + assert src["url"] == "acme/test-generator" + assert src["path"] == "src/plugin" + + def test_explicit_default_host_prefix_emits_shorthand(self, tmp_path: Path) -> None: + """``github.com/owner/repo`` (explicit default host) emits ``source: github``. + + Without this normalization, consumers (Claude Code, CI) that + pattern-match on ``source == "github"`` would silently skip + packages whose authors happened to write the host out in full, + even though the form is documented as equivalent. + """ + refs = {"acme/explicit": _make_refs("v1.0.0")} + yml = """\ +name: explicit-host +description: Test +version: 1.0.0 +owner: + name: Acme + email: t@acme.example.com + url: https://acme.example.com +packages: + - name: explicit + source: github.com/acme/explicit + ref: v1.0.0 +""" + report = _build_with_mock(tmp_path, yml, refs) + data = json.loads(report.output_path.read_text("utf-8")) + src = data["plugins"][0]["source"] + assert src["source"] == "github" + assert src["repo"] == "acme/explicit" + assert "url" not in src + + def test_per_host_resolvers_use_host_specific_code_path(self, tmp_path: Path) -> None: + """Host-prefixed entries flow through ``_get_resolver_for_host``. + + Verifies the integration glue without exercising the network: the + builder must reach the per-host code path for non-default-host + entries and the build must succeed when that path returns a working + resolver. Real per-host caching and token isolation are verified + in :class:`TestGetResolverForHostTokenIsolation` below. + """ + refs = { + "acme/code-reviewer": _make_refs("v2.0.0"), + "team/repo": _make_refs("v1.0.0"), + } + yml = """\ +name: mixed-tools +description: Test +version: 1.0.0 +owner: + name: Mixed + email: t@mixed.example.com + url: https://mixed.example.com +packages: + - name: code-reviewer + source: acme/code-reviewer + version: "^2.0.0" + - name: gitlab-tool + source: gitlab.example.org/team/repo + ref: v1.0.0 +""" + yml_path = _write_yml(tmp_path, yml) + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=True)) + mock = _MockRefResolver(refs) + builder._resolver = mock # type: ignore[assignment] + called_hosts: list[str] = [] + + def _spy(host: str) -> _MockRefResolver: + called_hosts.append(host) + return mock + + builder._get_resolver_for_host = _spy # type: ignore[assignment] + builder.build() + assert "gitlab.example.org" in called_hosts + + +class TestGetResolverForHostTokenIsolation: + """Real (non-mocked) ``_get_resolver_for_host`` token-isolation guarantees. + + Exercises the actual implementation -- including the AuthResolver wiring + -- so the secure-by-default property cannot regress silently. + """ + + def test_default_host_returns_shared_default_resolver(self, tmp_path: Path) -> None: + """``host=None`` and ``host==self._host`` both return the default resolver.""" + yml_path = _write_yml(tmp_path, _BASIC_YML) + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=True)) + default = builder._get_resolver_for_host(None) + assert builder._get_resolver_for_host(builder._host) is default + + def test_non_default_host_never_forwards_default_token(self, tmp_path: Path) -> None: + """A token resolved for the default host is never sent to a foreign host.""" + yml_path = _write_yml(tmp_path, _BASIC_YML) + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=True)) + builder._github_token = "ghp_default_host_secret" + builder._auth_resolved = True + resolver = builder._get_resolver_for_host("ghe.example.com") + assert resolver._host == "ghe.example.com" + # Offline mode short-circuits AuthResolver and yields no token. + assert resolver._token is None + assert resolver._token != builder._github_token + + def test_non_default_host_consults_auth_resolver(self, tmp_path: Path) -> None: + """Non-default hosts call ``AuthResolver.resolve(host)`` for their own token.""" + from unittest.mock import MagicMock + + from apm_cli.core.auth import AuthContext, HostInfo + + yml_path = _write_yml(tmp_path, _BASIC_YML) + # offline=False so the AuthResolver branch is exercised. + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=False)) + fake_ctx = AuthContext( + token="ghs_ghe_specific_token", + source="GITHUB_APM_PAT_GHE", + token_type="classic", + host_info=HostInfo( + host="ghe.example.com", + kind="ghes", + has_public_repos=False, + api_base="https://ghe.example.com/api/v3", + ), + git_env={}, + auth_scheme="basic", + ) + fake_auth = MagicMock() + fake_auth.resolve.return_value = fake_ctx + builder._auth_resolver = fake_auth + resolver = builder._get_resolver_for_host("ghe.example.com") + assert resolver._host == "ghe.example.com" + assert resolver._token == "ghs_ghe_specific_token" + fake_auth.resolve.assert_called_once_with("ghe.example.com") + + def test_per_host_resolver_is_cached(self, tmp_path: Path) -> None: + """Repeated lookups for the same host return the same instance.""" + yml_path = _write_yml(tmp_path, _BASIC_YML) + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=True)) + first = builder._get_resolver_for_host("ghe.example.com") + second = builder._get_resolver_for_host("ghe.example.com") + assert first is second + + def test_distinct_hosts_get_distinct_resolvers(self, tmp_path: Path) -> None: + """Different non-default hosts get isolated resolver instances.""" + yml_path = _write_yml(tmp_path, _BASIC_YML) + builder = MarketplaceBuilder(yml_path, BuildOptions(offline=True)) + ghe = builder._get_resolver_for_host("ghe.example.com") + gitlab = builder._get_resolver_for_host("gitlab.example.org") + assert ghe is not gitlab + assert ghe._host == "ghe.example.com" + assert gitlab._host == "gitlab.example.org" + # --------------------------------------------------------------------------- # Deterministic output (round-trip) @@ -1902,6 +2175,114 @@ def test_metadata_fetch_ghe_cloud_no_token_skipped(self, tmp_path: Path) -> None assert result is None mock_open.assert_not_called() + def test_metadata_fetch_uses_package_host_not_builder_host(self, tmp_path: Path) -> None: + """``pkg.host`` overrides ``self._host`` for URL + token + classification. + + Regression guard: previously ``_fetch_remote_metadata`` branched + only on ``self._host``, so a GHE-hosted package would be fetched + from ``raw.githubusercontent.com`` -- potentially returning an + unrelated github.com repo's metadata. After the fix, the + package's own host drives every decision. + """ + from unittest.mock import MagicMock + + from apm_cli.core.auth import AuthContext, HostInfo + + pkg = self._make_pkg(source_repo="platform/agents", subdir=None) + pkg = ResolvedPackage( + name=pkg.name, + source_repo=pkg.source_repo, + subdir=pkg.subdir, + ref=pkg.ref, + sha=pkg.sha, + requested_version=pkg.requested_version, + tags=pkg.tags, + is_prerelease=pkg.is_prerelease, + host="ghe.corp.example.com", + ) + builder = self._make_builder(tmp_path) + # Builder default is github.com with a github.com-scoped token. + builder._host = "github.com" + builder._github_token = "ghp_default_host_secret" + builder._host_info = SimpleNamespace(kind="github", api_base="https://api.github.com") + # AuthResolver returns a GHE-specific token for the package's host. + fake_auth = MagicMock() + fake_auth.resolve.return_value = AuthContext( + token="ghs_ghe_specific_token", + source="GITHUB_APM_PAT_GHE", + token_type="classic", + host_info=HostInfo( + host="ghe.corp.example.com", + kind="ghes", + has_public_repos=False, + api_base="https://ghe.corp.example.com/api/v3", + ), + git_env={}, + auth_scheme="basic", + ) + builder._auth_resolver = fake_auth + yaml_body = b"description: GHE tool\nversion: 0.3.1\n" + mock_resp = _FakeHTTPResponse(yaml_body) + # Force the package host to classify as GHES so the GitHub-family + # REST path is exercised (an arbitrary FQDN classifies as + # ``generic`` and is correctly skipped). + with ( + patch( + "apm_cli.core.auth.AuthResolver.classify_host", + return_value=HostInfo( + host="ghe.corp.example.com", + kind="ghes", + has_public_repos=False, + api_base="https://ghe.corp.example.com/api/v3", + ), + ), + patch( + "apm_cli.marketplace.builder.urllib.request.urlopen", + return_value=mock_resp, + ) as mock_open, + ): + result = builder._fetch_remote_metadata(pkg) + assert result == {"description": "GHE tool", "version": "0.3.1"} + req = mock_open.call_args[0][0] + parsed = urllib.parse.urlparse(req.full_url) + # URL hits the package's host, not the builder default. + assert parsed.hostname == "ghe.corp.example.com" + assert "raw.githubusercontent.com" not in req.full_url + # Token sent is the GHE-specific one, never the default-host token. + assert req.get_header("Authorization") == "token ghs_ghe_specific_token" + + def test_metadata_fetch_skipped_for_non_github_package_host(self, tmp_path: Path) -> None: + """Non-GitHub-class ``pkg.host`` (e.g. GitLab) skips fetch entirely.""" + from unittest.mock import patch as _patch + + pkg = ResolvedPackage( + name="gitlab-tool", + source_repo="team/repo", + subdir=None, + ref="v1.0.0", + sha=_SHA_A, + requested_version="^1.0.0", + tags=(), + is_prerelease=False, + host="gitlab.example.org", + ) + builder = self._make_builder(tmp_path) + builder._host = "github.com" + builder._host_info = SimpleNamespace(kind="github", api_base="https://api.github.com") + with ( + _patch( + "apm_cli.core.auth.AuthResolver.classify_host", + return_value=SimpleNamespace(kind="gitlab", api_base=None), + ), + _patch.object(builder, "_resolve_token_for_host", return_value=None), + patch( + "apm_cli.marketplace.builder.urllib.request.urlopen", + ) as mock_open, + ): + result = builder._fetch_remote_metadata(pkg) + assert result is None + mock_open.assert_not_called() + # --------------------------------------------------------------------------- # _ensure_auth lazy resolution diff --git a/tests/unit/marketplace/test_yml_editor.py b/tests/unit/marketplace/test_yml_editor.py index b20aabced..5e208a7dc 100644 --- a/tests/unit/marketplace/test_yml_editor.py +++ b/tests/unit/marketplace/test_yml_editor.py @@ -156,6 +156,28 @@ def test_invalid_source_no_slash_raises(self, tmp_path): with pytest.raises(MarketplaceYmlError, match="source"): add_plugin_entry(yml, source="noslash", version=">=1.0.0") + def test_invalid_source_error_message_lists_all_forms(self, tmp_path): + """Error wording must mention every accepted form so users get a complete hint.""" + yml = _write_yml(tmp_path, _BASIC_YML) + with pytest.raises(MarketplaceYmlError) as exc: + add_plugin_entry(yml, source="bogus@host/owner/repo", version=">=1.0.0") + msg = str(exc.value) + assert "/" in msg + assert "//" in msg + assert "https://" in msg + assert "./" in msg + + def test_host_prefixed_source_accepted(self, tmp_path): + """yml_editor accepts the same host-prefixed form as the loader.""" + yml = _write_yml(tmp_path, _BASIC_YML) + add_plugin_entry( + yml, + name="ghe-tool", + source="ghe.example.com/acme/tool", + ref="v1.0.0", + ) + assert "ghe.example.com/acme/tool" in yml.read_text("utf-8") + def test_path_traversal_in_subdir_raises(self, tmp_path): yml = _write_yml(tmp_path, _BASIC_YML) with pytest.raises(MarketplaceYmlError): diff --git a/tests/unit/marketplace/test_yml_schema.py b/tests/unit/marketplace/test_yml_schema.py index cb24897fc..5f889de84 100644 --- a/tests/unit/marketplace/test_yml_schema.py +++ b/tests/unit/marketplace/test_yml_schema.py @@ -654,6 +654,190 @@ def test_build_default_tag_pattern(self, tmp_path: Path): assert result.build.tag_pattern == "v{version}" +# --------------------------------------------------------------------------- +# Host-prefixed source form (``host.tld/owner/repo``) +# --------------------------------------------------------------------------- + + +class TestHostPrefixedSource: + """``source: host.tld/owner/repo`` splits the host off into ``PackageEntry.host``.""" + + def test_default_owner_repo_has_no_host(self, tmp_path: Path): + """Plain ``owner/repo`` source leaves ``host`` as None (use default).""" + content = _minimal_yml( + packages=("packages:\n - name: tool-a\n source: acme/tool-a\n ref: v1.0.0") + ) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.source == "acme/tool-a" + assert entry.host is None + assert entry.is_local is False + + def test_local_source_has_no_host(self, tmp_path: Path): + """``./path`` local sources never get a host.""" + content = _minimal_yml(packages=("packages:\n - name: tool-a\n source: ./acme")) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.is_local is True + assert entry.host is None + + def test_ghe_host_prefixed_source_split(self, tmp_path: Path): + """``host.tld/owner/repo`` splits host out and leaves ``owner/repo``.""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: tool-a\n" + " source: ghe.example.com/acme/agents\n" + " ref: v0.3.0" + ) + ) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.source == "acme/agents" + assert entry.host == "ghe.example.com" + + def test_github_com_host_prefix_accepted(self, tmp_path: Path): + """The default host can also be expressed explicitly as a host prefix.""" + content = _minimal_yml( + packages=( + "packages:\n - name: tool-a\n source: github.com/acme/tool-a\n ref: v1.0.0" + ) + ) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.source == "acme/tool-a" + assert entry.host == "github.com" + + def test_self_hosted_gitlab_host_accepted(self, tmp_path: Path): + """Any FQDN-shaped first segment is accepted (e.g. self-hosted GitLab).""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: tool-a\n" + " source: gitlab.example.org/team/repo\n" + " ref: main" + ) + ) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.source == "team/repo" + assert entry.host == "gitlab.example.org" + + def test_four_segment_path_rejected(self, tmp_path: Path): + """Source with four slash-separated segments is not a valid form.""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: tool-a\n" + " source: host.tld/extra/owner/repo\n" + " ref: v1.0.0" + ) + ) + yml = _write_yml(tmp_path, content) + with pytest.raises(MarketplaceYmlError, match="source"): + load_marketplace_yml(yml) + + def test_three_segment_without_dot_rejected(self, tmp_path: Path): + """First segment must look like a hostname (contain a dot).""" + content = _minimal_yml( + packages=( + "packages:\n - name: tool-a\n source: not-a-host/owner/repo\n ref: v1.0.0" + ) + ) + yml = _write_yml(tmp_path, content) + with pytest.raises(MarketplaceYmlError, match="source"): + load_marketplace_yml(yml) + + def test_subdir_preserved_with_host_prefix(self, tmp_path: Path): + """``subdir`` is independent of the host-prefix split.""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: baseline-rules\n" + " source: ghe.example.com/acme/agents\n" + " subdir: packages/baseline-rules\n" + " ref: v0.3.0" + ) + ) + yml = _write_yml(tmp_path, content) + result = load_marketplace_yml(yml) + entry = result.packages[0] + assert entry.host == "ghe.example.com" + assert entry.source == "acme/agents" + assert entry.subdir == "packages/baseline-rules" + + # -- HTTPS URL form ---------------------------------------------------- + + def test_https_url_source_accepted(self, tmp_path: Path): + """``https://host.tld/owner/repo`` URL form splits into host + owner/repo.""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: tool-a\n" + " source: https://ghe.example.com/acme/agents\n" + " ref: v0.3.0" + ) + ) + yml = _write_yml(tmp_path, content) + entry = load_marketplace_yml(yml).packages[0] + assert entry.host == "ghe.example.com" + assert entry.source == "acme/agents" + + def test_https_url_source_with_dot_git_suffix_normalized(self, tmp_path: Path): + """``.git`` suffix is stripped from URL-form sources.""" + content = _minimal_yml( + packages=( + "packages:\n" + " - name: tool-a\n" + " source: https://ghe.example.com/acme/agents.git\n" + " ref: v0.3.0" + ) + ) + yml = _write_yml(tmp_path, content) + entry = load_marketplace_yml(yml).packages[0] + assert entry.host == "ghe.example.com" + assert entry.source == "acme/agents" + + # -- Security-relevant rejection cases --------------------------------- + + @pytest.mark.parametrize( + "bad_source", + [ + # Userinfo injection: ``user@host`` masquerades as host segment. + "evil.com@github.com/owner/repo", + "https://evil.com@github.com/owner/repo", + # Port in host segment. + "github.com:8443/owner/repo", + "https://github.com:8443/owner/repo", + # SSH SCP form (``git@host:path``) -- not supported, must not + # slip through as a 2-segment owner/repo. + "git@github.com:acme/agents.git", + # Other URL schemes. + "http://github.com/owner/repo", + "ssh://git@github.com/owner/repo", + "git://github.com/owner/repo", + # Query / fragment in source. + "github.com/owner/repo?ref=main", + "github.com/owner/repo#frag", + # Whitespace. + "github.com /owner/repo", + ], + ) + def test_unsafe_source_forms_rejected(self, tmp_path: Path, bad_source: str): + """Userinfo / port / SSH / non-https / control-char sources are rejected.""" + content = _minimal_yml( + packages=(f'packages:\n - name: tool-a\n source: "{bad_source}"\n ref: main') + ) + yml = _write_yml(tmp_path, content) + with pytest.raises(MarketplaceYmlError, match="source"): + load_marketplace_yml(yml) + + # --------------------------------------------------------------------------- # S1: Output path traversal guard # ---------------------------------------------------------------------------