diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 00000000..b116904a --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,77 @@ +name: CodeQL + +# CodeQL static analysis for Go. Runs on every PR, every push to main, +# and weekly to catch new query packs landing upstream. Findings appear +# in the Security tab; default-severity ERROR also fails the PR check. +# +# Why this AND Semgrep? Different rule packs catch different classes of +# bugs — Semgrep is great at custom org-policy patterns, CodeQL is better +# at dataflow-heavy queries (taint sinks, SSRF, SQLi-style). They overlap +# but the union finds more. + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Sunday 04:23 UTC — off-peak, off-Monday-morning. + - cron: '23 4 * * 0' + workflow_dispatch: + +# Cancel in-progress runs for the same PR when a new commit lands. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + analyze: + name: Analyze Go + runs-on: ubuntu-24.04 + timeout-minutes: 30 + permissions: + contents: read + security-events: write # upload SARIF to the Security tab + actions: read # CodeQL templates fetch workflow metadata + + strategy: + fail-fast: false + matrix: + language: [go] + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + # Pin Go to the version declared in go.mod. Without this, CodeQL + # Autobuild uses the runner's bundled Go (older than our 1.25.x + # directive), which fails the compile and surfaces as a CI red + # rather than a real finding (gemini round-1 P2). + - name: Set up Go + uses: actions/setup-go@4a3601121dd01d1626a1e23e37211e3254c1c06c # v6 + with: + go-version-file: go.mod + cache: false + + - name: Initialize CodeQL + uses: github/codeql-action/init@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 + with: + languages: ${{ matrix.language }} + # security-extended adds queries beyond the default "security + # and quality" pack — taint tracking, less-common sink types, + # crypto misuses. Worth the extra ~2 min run-time for an OSS + # supply-chain tool. + queries: security-extended + + - name: Autobuild + uses: github/codeql-action/autobuild@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 + + - name: Perform analysis + uses: github/codeql-action/analyze@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 + with: + category: /language:${{ matrix.language }} diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml new file mode 100644 index 00000000..72db960c --- /dev/null +++ b/.github/workflows/scorecard.yml @@ -0,0 +1,68 @@ +name: OpenSSF Scorecard + +# OpenSSF Scorecard runs a battery of OSS-project security checks +# (Branch-Protection, Token-Permissions, Pinned-Dependencies, +# Dangerous-Workflow, etc.) and reports a score 0-10. We surface +# findings via SARIF in the Security tab AND publish to the public +# Scorecard API (api.securityscorecards.dev) so consumers can audit +# our posture without us asking. +# +# We do NOT use this for PR gating — checks are advisory and the score +# moves on factors outside any one PR's diff. + +on: + branch_protection_rule: + schedule: + # Wednesday 09:17 UTC — mid-week so any regression caught by a + # branch-protection change has time to be reverted before weekend. + - cron: '17 9 * * 3' + push: + branches: [main] + workflow_dispatch: + +# At-most-one scheduled run. +concurrency: + group: scorecard-${{ github.ref }} + cancel-in-progress: false + +permissions: + contents: read + +jobs: + analysis: + name: Scorecard analysis + runs-on: ubuntu-24.04 + permissions: + contents: read + # Required to upload SARIF to the Security tab. + security-events: write + # Required to publish results to securityscorecards.dev. + id-token: write + + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run analysis + uses: ossf/scorecard-action@4eaacf0543bb3f2c246792bd56e8cdeffafb205a # v2.4.3 + with: + results_file: results.sarif + results_format: sarif + # Publish to api.securityscorecards.dev so the badge + history + # are public. Required for the Scorecard badge to work. + publish_results: true + + - name: Upload SARIF + uses: github/codeql-action/upload-sarif@68bde559dea0fdcac2102bfdf6230c5f70eb485e # v4 + with: + sarif_file: results.sarif + category: scorecard + + - name: Upload artifact (debugging) + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: scorecard-results + path: results.sarif + retention-days: 30 diff --git a/CODEOWNERS b/CODEOWNERS new file mode 100644 index 00000000..6de21dfe --- /dev/null +++ b/CODEOWNERS @@ -0,0 +1,82 @@ +# https://docs.github.com/en/repositories/managing-your-repositories-settings-and-customization/customizing-your-repository/about-code-owners +# +# CODEOWNERS for simple-container-com/api. +# +# IMPORTANT — semantics: when multiple users are listed on the same +# pattern, GitHub treats them as ALTERNATIVES, not a conjunction. A PR +# satisfies the CODEOWNERS gate once ANY one listed owner approves. +# To genuinely require approvals from BOTH leads on sensitive paths, +# configure a branch protection rule / Ruleset that sets: +# - "Require review from Code Owners" +# - "Required approving reviews" ≥ 2 +# That's a repo-admin UI setting; this file can't express it. +# +# What this file DOES enforce: +# - GitHub auto-requests review from a code owner when a PR touches +# a matching file. +# - If the matching review-count rule is set in the admin UI, a PR +# cannot merge without enough code-owner approvals. +# +# Listing both leads on a pattern widens the review pool — either lead +# can sign off; both don't have to (unless the admin UI rule is set). +# +# Tip: GitHub picks the LAST matching pattern in this file for a given +# file path — ordering matters. Put broad defaults at the top and +# specific high-sensitivity globs at the bottom so they win. + +# --------------------------------------------------------------------- +# Default — every file pulls in at least one lead for review. +# --------------------------------------------------------------------- +* @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# CI / supply-chain — sensitive paths. Multi-reviewer gating depends +# on the branch-protection setting described in the header. Touching +# these files changes how every release is built and how secrets +# reach the runner. +# --------------------------------------------------------------------- +/.github/ @Cre-eD @smecsia +/.github/workflows/ @Cre-eD @smecsia +/.github/actions/ @Cre-eD @smecsia +/.github/dependabot.yml @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# Image build context. +# --------------------------------------------------------------------- +/*.Dockerfile @Cre-eD @smecsia +/welder.yaml @Cre-eD @smecsia +/sc.sh @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# Security pipeline source — code that signs, scans, attests, and +# decides what gets blocked. Tamper with this and the whole consumer +# supply chain weakens. +# --------------------------------------------------------------------- +/pkg/security/ @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# Secrets / config plumbing. +# --------------------------------------------------------------------- +/pkg/api/secrets/ @Cre-eD @smecsia +/pkg/cmd/cmd_secrets/ @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# Cloud provisioning — IAM and resource defaults that ship to every +# consumer account. +# --------------------------------------------------------------------- +/pkg/clouds/pulumi/aws/ @Cre-eD @smecsia +/pkg/clouds/pulumi/gcp/ @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# Module graph — dep bumps need security review (see +# `feedback_sc_ci_tools_step` for the CI-quirk caveat). +# --------------------------------------------------------------------- +/go.mod @Cre-eD @smecsia +/go.sum @Cre-eD @smecsia +/tools.go @Cre-eD @smecsia + +# --------------------------------------------------------------------- +# This policy + ownership file itself. +# --------------------------------------------------------------------- +/SECURITY.md @Cre-eD @smecsia +/CODEOWNERS @Cre-eD @smecsia diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..ed05c455 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,78 @@ +# Security Policy + +Simple Container (`sc`) is an OSS supply-chain tool that runs in consumer +CI/CD and provisions cloud resources in customer accounts. A vulnerability +in this codebase can propagate downstream to every consumer, so we treat +all reports as high priority. + +## Supported versions + +Security fixes are issued for the **most recent calver release** (the +tag pushed by [`.github/workflows/push.yaml`][push] on every merge to +`main`) and back-ported to the latest `vYYYY.M.x` line only when the +fix is non-trivial. Older versions receive no patches; consumers should +pin to a recent release tag (or a SHA) and update via Dependabot / +[`integrail/devops/.github/actions/install-sc`][install-sc] (or +equivalent) on at least a monthly cadence. + +| Version | Supported | +|---|---| +| `vYYYY.M.x` latest | ✅ | +| Previous calver release on the same month line | ✅ (best-effort back-port) | +| Anything older | ❌ | + +## Reporting a vulnerability + +**Do not file a public issue.** Use one of these channels in order of +preference: + +1. **[GitHub Security Advisory][gsa]** — preferred. Private to maintainers, + integrates with CVE issuance and the GitHub-side fix workflow. +2. **Email** `security@simple-container.com` if you can't use GitHub + Security Advisories. + +Please include: + +- A description of the issue and the security impact you observed. +- The exact `sc` version (or commit SHA) affected. +- Reproduction steps or a proof of concept where possible. +- Whether you've shared the report with any third party. + +We aim to acknowledge within **3 working days** and to ship a fix or +mitigation within **30 days** for HIGH/CRITICAL findings, **90 days** +for MEDIUM, longer for LOW. We'll keep you updated and credit you in +the advisory unless you ask to remain anonymous. + +## Out of scope + +These are intentionally outside the scope of this policy because they +sit in the *consumer's* infrastructure, not in this codebase: + +- Vulnerabilities in the consumer's cloud account (IAM misconfig, etc.) + caused by how they *use* `sc`. Reach out to the relevant cloud + provider or to the consumer. +- Vulnerabilities in third-party tools `sc` invokes (`pulumi`, `cosign`, + `syft`, `trivy`, `grype`, `gcloud`, `kubectl`). Report those upstream. +- The Caddy / cloud-helpers / kubectl Docker images' *upstream* OS + packages — we re-roll on each release and the deferred CVE log is + documented in PRs at merge time. + +## Hardening posture + +The repository is hardened against the relevant supply-chain risks +covered by CIS, OWASP CICD Top 10, SLSA, NIST SSDF, and the OpenSSF +Scorecard. Current control status is tracked in the hardening pipeline +(image scan, SBOM, Semgrep, Dependabot, secret scan run on every PR +and merge). For details on the threat model and the controls that +ship with each release, see the PR history. + +## Cryptographic primitives + +`sc` uses **only** primitives from the Go standard library and a small +set of audited libraries (`cosign`, `sigstore-go`). We avoid rolling +our own crypto. The local security-scan cache uses HMAC-SHA256 with a +32-byte random per-cache key for tamper detection. + +[push]: .github/workflows/push.yaml +[install-sc]: https://github.com/simple-container-com/actions/tree/main/install-sc +[gsa]: https://github.com/simple-container-com/api/security/advisories/new diff --git a/pkg/security/cache.go b/pkg/security/cache.go index 0d174558..abe497c3 100644 --- a/pkg/security/cache.go +++ b/pkg/security/cache.go @@ -1,18 +1,34 @@ package security import ( + "crypto/hmac" + "crypto/rand" "crypto/sha256" "encoding/hex" "encoding/json" + "errors" "fmt" + "io" "os" "path/filepath" + "strings" + "syscall" "time" ) -// Cache provides TTL-based caching for security operation results +// Cache provides TTL-based, HMAC-authenticated caching for security +// operation results (SBOM, image scan, signature checks). +// +// Integrity model: each entry is signed with HMAC-SHA256 using a 32-byte +// key persisted at /.hmac.key (mode 0600, generated from +// crypto/rand on first use). Tampering with the on-disk JSON, or +// substituting a file written under a different key, causes the entry +// to fail verification and be silently discarded. This replaces the +// previous mtime-based tamper-detection heuristic, which was trivially +// forgeable with `touch -t`. type Cache struct { baseDir string + key []byte // HMAC-SHA256 key, 32 bytes } // CacheKey uniquely identifies a cached result @@ -30,6 +46,13 @@ type CacheEntry struct { ExpiresAt time.Time `json:"expiresAt"` } +// signedEntry is the on-disk shape: the entry plus a hex-encoded +// HMAC-SHA256 of the entry's canonical JSON. +type signedEntry struct { + Entry CacheEntry `json:"entry"` + MAC string `json:"mac"` +} + // TTL durations for different operations const ( TTL_SBOM = 24 * time.Hour // SBOM: 24h @@ -37,7 +60,23 @@ const ( TTL_SCAN_TRIVY = 6 * time.Hour // Trivy scan: 6h ) -// NewCache creates a new cache instance +const ( + // hmacKeyFilename is the name of the per-cache HMAC key file. The + // leading dot lets cleanup walkers skip it cheaply by name; mode is + // 0o600 so even other processes running as the same user can't + // silently rotate the key from under us. + hmacKeyFilename = ".hmac.key" + hmacKeyLen = 32 // 256 bits, matches HMAC-SHA256 block-size guidance + // tempFileGracePeriod bounds how long Clean() will leave an + // in-flight `.cache.tmp.*` / `.hmac.key.tmp.*` file alone before + // reclaiming it. A legitimate writer publishes in sub-second; + // anything older than 1h is a crashed Set() that won't return. + tempFileGracePeriod = 1 * time.Hour +) + +// NewCache creates a new cache instance, generating or loading the +// per-cache HMAC key. Returns an error if the existing key file is +// the wrong size (likely corrupted or truncated). func NewCache(baseDir string) (*Cache, error) { if baseDir == "" { homeDir, err := os.UserHomeDir() @@ -47,60 +86,200 @@ func NewCache(baseDir string) (*Cache, error) { baseDir = filepath.Join(homeDir, ".simple-container", "cache", "security") } - // Create cache directory if it doesn't exist if err := os.MkdirAll(baseDir, 0o700); err != nil { return nil, fmt.Errorf("creating cache directory: %w", err) } + key, err := loadOrCreateHMACKey(filepath.Join(baseDir, hmacKeyFilename)) + if err != nil { + return nil, fmt.Errorf("initializing cache HMAC key: %w", err) + } + return &Cache{ baseDir: baseDir, + key: key, }, nil } -// Get retrieves a cached result if it exists and hasn't expired -func (c *Cache) Get(key CacheKey) ([]byte, bool, error) { - path := c.getPath(key) +// loadOrCreateHMACKey reads the HMAC key from path; if absent, generates +// a new 32-byte random key and persists it via os.CreateTemp + Rename. +// +// Concurrency: a concurrent reader observes either the fully-written +// 32-byte file or no file at all — never a 0-byte mid-creation view. +// The prior O_CREATE|O_EXCL + write sequence had a narrow window where +// another reader could ReadFile a 0-byte file and fail with "corrupted" +// (codex round-1 P2). Two processes racing here each generate a key; +// only one Rename wins, and the loser's reread picks up the winner's +// bytes (cryptographically equivalent — both random). +func loadOrCreateHMACKey(path string) ([]byte, error) { + if existing, err := readKeyFile(path); err == nil { + return existing, nil + } else if !errors.Is(err, os.ErrNotExist) { + return nil, err + } - // Check if file exists - info, err := os.Stat(path) - if os.IsNotExist(err) { - return nil, false, nil + key := make([]byte, hmacKeyLen) + if _, err := io.ReadFull(rand.Reader, key); err != nil { + return nil, fmt.Errorf("generating hmac key: %w", err) + } + + dir := filepath.Dir(path) + tmp, err := os.CreateTemp(dir, ".hmac.key.tmp.*") + if err != nil { + return nil, fmt.Errorf("creating hmac key temp: %w", err) + } + tmpPath := tmp.Name() + defer func() { _ = os.Remove(tmpPath) }() + if err := os.Chmod(tmpPath, 0o600); err != nil { + _ = tmp.Close() + return nil, fmt.Errorf("chmod hmac key temp: %w", err) + } + if _, err := tmp.Write(key); err != nil { + _ = tmp.Close() + return nil, fmt.Errorf("writing hmac key: %w", err) + } + if err := tmp.Sync(); err != nil { + _ = tmp.Close() + return nil, fmt.Errorf("syncing hmac key: %w", err) + } + if err := tmp.Close(); err != nil { + return nil, fmt.Errorf("closing hmac key temp: %w", err) + } + + // Publish strategy: + // 1. os.Link (atomic NO-REPLACE): if two NewCache calls race, + // the loser's Link fails with ErrExist and adopts the winner's + // key. This is the race-free path (codex round-2 P2). + // 2. Fallback to os.Rename when the filesystem doesn't support + // hardlinks at all — Docker Desktop volume mounts, VirtualBox + // vboxsf, some SMB shares (gemini round-3 P2). Rename is + // atomic-with-replace, so a concurrent NewCache on the same + // dir on such a filesystem can briefly diverge — accepted + // trade-off for dev-machine usability. Real production + // surface (Linux ext4/overlayFS, macOS APFS, container + // layered FS, NFSv3+) supports Link and takes the race-free + // path. + // The same-directory `dir` keeps either op a same-FS operation. + if err := os.Link(tmpPath, path); err != nil { + if errors.Is(err, os.ErrExist) { + // Race lost — adopt the winner's key. + return readKeyFile(path) + } + if isLinkUnsupported(err) { + // FS doesn't do hardlinks; fall through to Rename. + if rerr := os.Rename(tmpPath, path); rerr != nil { + return nil, fmt.Errorf("placing hmac key (link unsupported, rename also failed): %w", rerr) + } + return readKeyFile(path) + } + return nil, fmt.Errorf("placing hmac key: %w", err) + } + // tmpPath is removed by the deferred cleanup; the linked path is + // the canonical key from now on. + return readKeyFile(path) +} + +// isLinkUnsupported tells whether the error from os.Link indicates a +// filesystem that rejects hardlinks (Docker Desktop bind mounts, +// vboxsf, some SMB). syscall.EPERM and syscall.ENOTSUP are the usual +// posix-y codes; syscall.EXDEV would mean cross-device (shouldn't +// happen given same-directory temp, but defensive). +func isLinkUnsupported(err error) bool { + return errors.Is(err, syscall.EPERM) || + errors.Is(err, syscall.ENOTSUP) || + errors.Is(err, syscall.EXDEV) || + errors.Is(err, syscall.EOPNOTSUPP) +} + +// readKeyFile reads and validates the HMAC key file. Returns +// os.ErrNotExist when absent. A wrong-size read is permanent +// corruption — there is no race window to retry through, because +// loadOrCreateHMACKey publishes via os.Link of a fully-written, +// fsynced temp file. The canonical path appears atomically with +// exactly hmacKeyLen bytes or not at all. +func readKeyFile(path string) ([]byte, error) { + existing, err := os.ReadFile(path) + if errors.Is(err, os.ErrNotExist) { + return nil, err } if err != nil { - return nil, false, fmt.Errorf("checking cache file: %w", err) + return nil, fmt.Errorf("reading hmac key: %w", err) + } + if len(existing) != hmacKeyLen { + return nil, fmt.Errorf("hmac key at %s is %d bytes, expected %d (corrupted)", + path, len(existing), hmacKeyLen) } + return existing, nil +} + +// computeMAC returns the HMAC-SHA256 of entryJSON using the cache's key. +func (c *Cache) computeMAC(entryJSON []byte) []byte { + h := hmac.New(sha256.New, c.key) + h.Write(entryJSON) + return h.Sum(nil) +} + +// Get retrieves a cached result if it exists, hasn't expired, and its +// HMAC verifies against the cache's key. Tampered or unsigned entries +// (legacy files from the pre-HMAC code path) are silently removed and +// reported as a cache miss. +func (c *Cache) Get(key CacheKey) ([]byte, bool, error) { + path := c.getPath(key) - // Read cache entry data, err := os.ReadFile(path) if err != nil { + if errors.Is(err, os.ErrNotExist) { + return nil, false, nil + } return nil, false, fmt.Errorf("reading cache file: %w", err) } - var entry CacheEntry - if err := json.Unmarshal(data, &entry); err != nil { - // Invalid cache entry, treat as cache miss + var signed signedEntry + if err := json.Unmarshal(data, &signed); err != nil { + // Unparseable — treat as miss. _ = os.Remove(path) return nil, false, nil } - // Check expiration - if time.Now().After(entry.ExpiresAt) { - // Expired, remove and return cache miss + // Reject pre-HMAC entries (no MAC field) and any zero-length MAC. + gotMAC, err := hex.DecodeString(signed.MAC) + if err != nil || len(gotMAC) == 0 { + _ = os.Remove(path) + return nil, false, nil + } + + entryJSON, err := json.Marshal(signed.Entry) + if err != nil { + // Should never happen — we just unmarshaled the same shape. _ = os.Remove(path) return nil, false, nil } - // Verify file modification time hasn't been tampered with - if info.ModTime().After(entry.CreatedAt.Add(1 * time.Hour)) { - // File was modified after creation, invalidate + if !hmac.Equal(gotMAC, c.computeMAC(entryJSON)) { + // Tamper detected (or written by a different key). Discard. _ = os.Remove(path) return nil, false, nil } - return entry.Data, true, nil + // MAC covers the entry content but not the location on disk. A + // valid signed file copied from path A to path B would still verify + // here, and `Get(keyB)` would return keyA's payload — bypassing the + // integrity story for SBOM/scan data. Bind the lookup to the embedded + // CacheKey and discard mismatches. + if signed.Entry.Key != key { + _ = os.Remove(path) + return nil, false, nil + } + + if time.Now().After(signed.Entry.ExpiresAt) { + _ = os.Remove(path) + return nil, false, nil + } + + return signed.Entry.Data, true, nil } -// Set stores a result in the cache with appropriate TTL +// Set stores a result in the cache with the appropriate TTL. func (c *Cache) Set(key CacheKey, data []byte) error { return c.SetWithTTL(key, data, c.getTTL(key.Operation)) } @@ -119,22 +298,55 @@ func (c *Cache) SetWithTTL(key CacheKey, data []byte, ttl time.Duration) error { ExpiresAt: now.Add(ttl), } - entryData, err := json.Marshal(entry) + entryJSON, err := json.Marshal(entry) if err != nil { return fmt.Errorf("marshaling cache entry: %w", err) } - path := c.getPath(key) + signed := signedEntry{ + Entry: entry, + MAC: hex.EncodeToString(c.computeMAC(entryJSON)), + } - // Create directory if it doesn't exist + out, err := json.Marshal(signed) + if err != nil { + return fmt.Errorf("marshaling signed entry: %w", err) + } + + path := c.getPath(key) dir := filepath.Dir(path) if err := os.MkdirAll(dir, 0o700); err != nil { return fmt.Errorf("creating cache directory: %w", err) } - // Write with secure permissions (0600) - if err := os.WriteFile(path, entryData, 0o600); err != nil { - return fmt.Errorf("writing cache file: %w", err) + // Atomic publish: CreateTemp in the SAME directory (so Rename is a + // same-filesystem op and stays atomic) → Chmod → Write → Sync → + // Close → Rename. Without this, a concurrent Clean() can read a + // partially-written file, fail to unmarshal, and remove it from + // under us — corrupting the cache (gemini round-1 P1). + tmp, err := os.CreateTemp(dir, ".cache.tmp.*") + if err != nil { + return fmt.Errorf("creating cache temp: %w", err) + } + tmpPath := tmp.Name() + defer func() { _ = os.Remove(tmpPath) }() + if err := os.Chmod(tmpPath, 0o600); err != nil { + _ = tmp.Close() + return fmt.Errorf("chmod cache temp: %w", err) + } + if _, err := tmp.Write(out); err != nil { + _ = tmp.Close() + return fmt.Errorf("writing cache temp: %w", err) + } + if err := tmp.Sync(); err != nil { + _ = tmp.Close() + return fmt.Errorf("syncing cache temp: %w", err) + } + if err := tmp.Close(); err != nil { + return fmt.Errorf("closing cache temp: %w", err) + } + if err := os.Rename(tmpPath, path); err != nil { + return fmt.Errorf("placing cache file: %w", err) } return nil @@ -144,8 +356,8 @@ func (c *Cache) SetWithTTL(key CacheKey, data []byte, ttl time.Duration) error { func (c *Cache) Invalidate(key CacheKey) error { path := c.getPath(key) err := os.Remove(path) - if os.IsNotExist(err) { - return nil // Already gone + if errors.Is(err, os.ErrNotExist) { + return nil } if err != nil { return fmt.Errorf("removing cache file: %w", err) @@ -153,50 +365,92 @@ func (c *Cache) Invalidate(key CacheKey) error { return nil } -// Clean removes expired cache entries +// Clean removes expired or HMAC-invalid cache entries. The HMAC key +// file itself is skipped by name. func (c *Cache) Clean() error { return filepath.Walk(c.baseDir, func(path string, info os.FileInfo, err error) error { if err != nil { - return nil // Skip files we can't access + return nil } - if info.IsDir() { - return nil // Skip directories + return nil + } + if info.Name() == hmacKeyFilename { + return nil // Never touch the key. + } + // In-flight `Set()` / `loadOrCreateHMACKey` temp files: skip if + // recent (writer's Rename/Link is imminent), reclaim if stale. + // A legitimate Set takes sub-second; mtime > 1h ago is a + // crashed writer (codex round-3 P3). Without the grace-period + // sweep, MB-scale SBOM temps could accumulate indefinitely. + if strings.HasPrefix(info.Name(), ".cache.tmp.") || + strings.HasPrefix(info.Name(), ".hmac.key.tmp.") { + if time.Since(info.ModTime()) > tempFileGracePeriod { + _ = os.Remove(path) + } + return nil } - // Read and check expiration data, err := os.ReadFile(path) if err != nil { - return nil // Skip files we can't read + return nil + } + + var signed signedEntry + if err := json.Unmarshal(data, &signed); err != nil { + _ = os.Remove(path) + return nil + } + + gotMAC, err := hex.DecodeString(signed.MAC) + if err != nil || len(gotMAC) == 0 { + _ = os.Remove(path) + return nil } - var entry CacheEntry - if err := json.Unmarshal(data, &entry); err != nil { - // Invalid entry, remove it + entryJSON, err := json.Marshal(signed.Entry) + if err != nil || !hmac.Equal(gotMAC, c.computeMAC(entryJSON)) { _ = os.Remove(path) return nil } - // Check expiration - if time.Now().After(entry.ExpiresAt) { + // Mirror the path-to-key binding from Get: a validly-signed + // entry parked at the wrong filesystem location is also garbage. + if c.getPath(signed.Entry.Key) != path { _ = os.Remove(path) + return nil } + if time.Now().After(signed.Entry.ExpiresAt) { + _ = os.Remove(path) + } return nil }) } -// getPath returns the filesystem path for a cache key +// getPath returns the filesystem path for a cache key. +// +// CacheKey.Operation is used directly as a subdirectory name. All +// callers in-tree pass hardcoded values ("sbom", "scan-grype", +// "scan-trivy", "signature"), but defense-in-depth: a `../` in +// Operation would let a future careless caller escape baseDir +// (gemini round-3 P3). `filepath.Base` collapses any path separator +// or traversal segment to a single bare name, so even malicious +// input lands inside baseDir. func (c *Cache) getPath(key CacheKey) string { - // Create a deterministic filename from the key hash := sha256.New() hash.Write([]byte(key.Operation)) hash.Write([]byte(key.ImageDigest)) hash.Write([]byte(key.ConfigHash)) filename := hex.EncodeToString(hash.Sum(nil)) - // Organize by operation type - return filepath.Join(c.baseDir, key.Operation, filename+".json") + op := filepath.Base(filepath.Clean(key.Operation)) + // filepath.Base("..") returns ".." — must catch explicitly or the + // Join walks above baseDir. Same for raw separators on Windows. + if op == "" || op == "." || op == ".." || strings.ContainsAny(op, `/\`) { + op = "_unknown" + } + return filepath.Join(c.baseDir, op, filename+".json") } // getTTL returns the TTL for a given operation type @@ -207,16 +461,17 @@ func (c *Cache) getTTL(operation string) time.Duration { case "scan-grype", "scan-trivy": return TTL_SCAN_GRYPE default: - return 6 * time.Hour // Default TTL + return 6 * time.Hour } } -// Size returns the total size of the cache in bytes +// Size returns the total size of the cache in bytes (incl. the HMAC key, +// which is negligible at 32 bytes). func (c *Cache) Size() (int64, error) { var size int64 err := filepath.Walk(c.baseDir, func(path string, info os.FileInfo, err error) error { if err != nil { - return nil // Skip files we can't access + return nil } if !info.IsDir() { size += info.Size() @@ -226,7 +481,8 @@ func (c *Cache) Size() (int64, error) { return size, err } -// Clear removes all cached entries +// Clear removes all cached entries AND the HMAC key. The next NewCache +// call will generate a fresh key. func (c *Cache) Clear() error { return os.RemoveAll(c.baseDir) } diff --git a/pkg/security/cache_test.go b/pkg/security/cache_test.go index 8d52b7b8..1b514755 100644 --- a/pkg/security/cache_test.go +++ b/pkg/security/cache_test.go @@ -4,6 +4,7 @@ import ( "encoding/json" "os" "path/filepath" + "strings" "testing" "time" @@ -326,3 +327,382 @@ func TestCacheKeyPath(t *testing.T) { func containsSubstring(s, substr string) bool { return filepath.Base(filepath.Dir(s)) == substr || filepath.Base(s) == substr } + +// --------------------------------------------------------------------- +// HMAC integrity tests (Phase 5 — replaces the prior mtime tamper check). +// --------------------------------------------------------------------- + +func TestCacheHMACKeyPersisted(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + + cache1, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + Expect(cache1.key).To(HaveLen(hmacKeyLen)) + + keyPath := filepath.Join(dir, hmacKeyFilename) + stat, err := os.Stat(keyPath) + Expect(err).ToNot(HaveOccurred()) + Expect(stat.Size()).To(Equal(int64(hmacKeyLen))) + Expect(stat.Mode().Perm()).To(Equal(os.FileMode(0o600))) + + // Re-open the same dir: must reuse the existing key, not regenerate. + cache2, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + Expect(cache2.key).To(Equal(cache1.key)) +} + +func TestCacheHMACKeyWrongSizeRejected(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + // Pre-seed a truncated key file. + Expect(os.WriteFile(filepath.Join(dir, hmacKeyFilename), []byte("short"), 0o600)).To(Succeed()) + + _, err := NewCache(dir) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("hmac key")) +} + +func TestCacheTamperDetection(t *testing.T) { + RegisterTestingT(t) + + cache, err := NewCache(t.TempDir()) + Expect(err).ToNot(HaveOccurred()) + + key := CacheKey{Operation: "sbom", ImageDigest: "sha256:tamper", ConfigHash: "h"} + Expect(cache.Set(key, []byte("original"))).To(Succeed()) + + path := cache.getPath(key) + raw, err := os.ReadFile(path) + Expect(err).ToNot(HaveOccurred()) + + var sig signedEntry + Expect(json.Unmarshal(raw, &sig)).To(Succeed()) + + // Tamper the entry data; leave the MAC alone — HMAC must catch it. + sig.Entry.Data = []byte("malicious") + tampered, err := json.Marshal(sig) + Expect(err).ToNot(HaveOccurred()) + Expect(os.WriteFile(path, tampered, 0o600)).To(Succeed()) + + got, found, err := cache.Get(key) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeFalse(), "tampered entry must be treated as cache miss") + Expect(got).To(BeNil()) + + // The corrupt file must also be removed. + _, err = os.Stat(path) + Expect(os.IsNotExist(err)).To(BeTrue()) +} + +func TestCacheMtimeForgeryNoLongerHelpful(t *testing.T) { + RegisterTestingT(t) + + cache, err := NewCache(t.TempDir()) + Expect(err).ToNot(HaveOccurred()) + + key := CacheKey{Operation: "sbom", ImageDigest: "sha256:mtime", ConfigHash: "h"} + Expect(cache.Set(key, []byte("payload"))).To(Succeed()) + + // Advance mtime far into the future — the old code would have + // invalidated the entry here. The HMAC-based check ignores mtime, + // so the entry must remain valid. + path := cache.getPath(key) + future := time.Now().Add(48 * time.Hour) + Expect(os.Chtimes(path, future, future)).To(Succeed()) + + got, found, err := cache.Get(key) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeTrue(), "mtime is no longer load-bearing for integrity") + Expect(string(got)).To(Equal("payload")) +} + +func TestCacheKeyMismatchRejectsEntry(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + + // Create the cache, write an entry, then swap the HMAC key as if a + // different process (or the user's clearing it) had rotated the key. + c1, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + key := CacheKey{Operation: "sbom", ImageDigest: "sha256:k", ConfigHash: "h"} + Expect(c1.Set(key, []byte("data"))).To(Succeed()) + + // Replace the key file with random bytes of the right length. + newKey := make([]byte, hmacKeyLen) + newKey[0] = 0xff + Expect(os.WriteFile(filepath.Join(dir, hmacKeyFilename), newKey, 0o600)).To(Succeed()) + + c2, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + got, found, err := c2.Get(key) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeFalse(), "entry signed under the previous key must fail verification") + Expect(got).To(BeNil()) +} + +func TestCacheLegacyUnsignedEntryDiscarded(t *testing.T) { + RegisterTestingT(t) + + cache, err := NewCache(t.TempDir()) + Expect(err).ToNot(HaveOccurred()) + + key := CacheKey{Operation: "sbom", ImageDigest: "sha256:legacy", ConfigHash: "h"} + path := cache.getPath(key) + Expect(os.MkdirAll(filepath.Dir(path), 0o700)).To(Succeed()) + + // Simulate a pre-HMAC entry: just the unwrapped CacheEntry shape. + legacy := CacheEntry{ + Key: key, + Data: []byte("pre-hmac"), + CreatedAt: time.Now(), + ExpiresAt: time.Now().Add(time.Hour), + } + legacyBytes, err := json.Marshal(legacy) + Expect(err).ToNot(HaveOccurred()) + Expect(os.WriteFile(path, legacyBytes, 0o600)).To(Succeed()) + + got, found, err := cache.Get(key) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeFalse(), "legacy unsigned entries must be discarded on upgrade") + Expect(got).To(BeNil()) + + _, err = os.Stat(path) + Expect(os.IsNotExist(err)).To(BeTrue()) +} + +// Cross-key copy: a signed file from key A copied to key B's path +// would have a valid MAC but should NOT satisfy `Get(keyB)`. The +// embedded CacheKey must match the requested key. +func TestCacheCrossKeyCopyRejected(t *testing.T) { + RegisterTestingT(t) + + cache, err := NewCache(t.TempDir()) + Expect(err).ToNot(HaveOccurred()) + + keyA := CacheKey{Operation: "sbom", ImageDigest: "sha256:A", ConfigHash: "h"} + keyB := CacheKey{Operation: "sbom", ImageDigest: "sha256:B", ConfigHash: "h"} + + Expect(cache.Set(keyA, []byte("payload-A"))).To(Succeed()) + + // Copy the validly-signed file from A's path to B's path. + src, dst := cache.getPath(keyA), cache.getPath(keyB) + Expect(os.MkdirAll(filepath.Dir(dst), 0o700)).To(Succeed()) + bytes, err := os.ReadFile(src) + Expect(err).ToNot(HaveOccurred()) + Expect(os.WriteFile(dst, bytes, 0o600)).To(Succeed()) + + got, found, err := cache.Get(keyB) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeFalse(), "MAC alone must not bind a signed file to an arbitrary cache path") + Expect(got).To(BeNil()) + + // The misplaced copy must be removed. + _, err = os.Stat(dst) + Expect(os.IsNotExist(err)).To(BeTrue()) + + // keyA's own entry must still be intact. + got, found, err = cache.Get(keyA) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeTrue()) + Expect(string(got)).To(Equal("payload-A")) +} + +// Concurrent NewCache on the same fresh directory must converge on a +// single HMAC key. With the previous Rename-based publish, two racing +// processes could each install their own distinct key, causing each +// to invalidate the other's cache entries (codex round-2 P2). The Link +// based publish forces a winner and re-reads the on-disk truth. +func TestCacheHMACKeyConcurrentInitConverges(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + const n = 8 + + keys := make(chan []byte, n) + errs := make(chan error, n) + start := make(chan struct{}) + + for i := 0; i < n; i++ { + go func() { + <-start + c, err := NewCache(dir) + if err != nil { + errs <- err + return + } + cp := make([]byte, len(c.key)) + copy(cp, c.key) + keys <- cp + }() + } + close(start) // unleash all goroutines simultaneously + + var first []byte + for i := 0; i < n; i++ { + select { + case err := <-errs: + t.Fatalf("NewCache failed: %v", err) + case k := <-keys: + if first == nil { + first = k + continue + } + Expect(k).To(Equal(first), + "goroutine %d got a different HMAC key — Link-based publish is broken", i) + } + } +} + +// Clean must not delete an in-flight `Set()` temp file — gemini-flagged +// race where Clean reads a partial file, fails to unmarshal, and removes +// it from under a still-running writer. +func TestCacheCleanSkipsInFlightTempFiles(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + cache, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + // Simulate an in-flight Set() by dropping a `.cache.tmp.*` file in + // the operation subdir. + opDir := filepath.Join(dir, "sbom") + Expect(os.MkdirAll(opDir, 0o700)).To(Succeed()) + cacheTmp := filepath.Join(opDir, ".cache.tmp.12345") + Expect(os.WriteFile(cacheTmp, []byte("partial bytes — not valid JSON yet"), 0o600)).To(Succeed()) + + Expect(cache.Clean()).To(Succeed()) + _, err = os.Stat(cacheTmp) + Expect(err).ToNot(HaveOccurred(), "Clean must leave .cache.tmp.* files alone") + + // Same guard for .hmac.key.tmp.* at the baseDir level. + hmacTmp := filepath.Join(dir, ".hmac.key.tmp.67890") + Expect(os.WriteFile(hmacTmp, []byte("partial key bytes"), 0o600)).To(Succeed()) + Expect(cache.Clean()).To(Succeed()) + _, err = os.Stat(hmacTmp) + Expect(err).ToNot(HaveOccurred(), "Clean must leave .hmac.key.tmp.* files alone") +} + +// getPath must contain malicious Operation values that try to escape +// baseDir (gemini round-3 P3 — defense in depth; no caller is hostile +// today, but a future careless callsite shouldn't be able to corrupt +// the parent filesystem). +func TestCachePathTraversalContained(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + cache, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + hostile := []string{ + "../../../etc", + "..", + "/etc/passwd", + `..\..\windows`, + "a/b", + } + for _, op := range hostile { + key := CacheKey{Operation: op, ImageDigest: "sha256:x", ConfigHash: "h"} + p := cache.getPath(key) + Expect(strings.HasPrefix(p, dir+string(filepath.Separator))).To(BeTrue(), + "getPath(%q) = %q must stay under baseDir %q", op, p, dir) + Expect(strings.Contains(p, "..")).To(BeFalse(), + "getPath(%q) = %q must not contain `..`", op, p) + } +} + +// Crashed writers leave behind temp files Clean() would otherwise +// skip forever. After tempFileGracePeriod (1h), Clean reclaims them +// (codex round-3 P3). Test forces the grace by back-dating mtime. +func TestCacheCleanReclaimsStaleTemps(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + cache, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + opDir := filepath.Join(dir, "sbom") + Expect(os.MkdirAll(opDir, 0o700)).To(Succeed()) + + // Recent temp: must stay. Old temp: must be reclaimed. + recent := filepath.Join(opDir, ".cache.tmp.recent") + stale := filepath.Join(opDir, ".cache.tmp.stale") + Expect(os.WriteFile(recent, []byte("recent"), 0o600)).To(Succeed()) + Expect(os.WriteFile(stale, []byte("stale-from-a-crashed-writer"), 0o600)).To(Succeed()) + + // Back-date stale beyond the grace period. + pastTime := time.Now().Add(-2 * tempFileGracePeriod) + Expect(os.Chtimes(stale, pastTime, pastTime)).To(Succeed()) + + Expect(cache.Clean()).To(Succeed()) + + _, err = os.Stat(recent) + Expect(err).ToNot(HaveOccurred(), "recent temp file must be left alone") + _, err = os.Stat(stale) + Expect(os.IsNotExist(err)).To(BeTrue(), "stale temp file must be reclaimed") +} + +// Set must publish atomically — the on-disk file is never a partial +// write, and the temp file is cleaned up after a successful Rename. +func TestCacheSetIsAtomic(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + cache, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + key := CacheKey{Operation: "sbom", ImageDigest: "sha256:atomic", ConfigHash: "h"} + bigPayload := make([]byte, 256*1024) + for i := range bigPayload { + bigPayload[i] = byte(i) + } + + Expect(cache.Set(key, bigPayload)).To(Succeed()) + + got, found, err := cache.Get(key) + Expect(err).ToNot(HaveOccurred()) + Expect(found).To(BeTrue()) + Expect(got).To(Equal(bigPayload)) + + // No leftover temp files in the op dir after a successful Set. + opDir := filepath.Join(dir, "sbom") + entries, err := os.ReadDir(opDir) + Expect(err).ToNot(HaveOccurred()) + for _, e := range entries { + Expect(strings.HasPrefix(e.Name(), ".cache.tmp.")).To(BeFalse(), + "Set must clean up its temp file after Rename, found %s", e.Name()) + } +} + +func TestCacheCleanSkipsHMACKey(t *testing.T) { + RegisterTestingT(t) + + dir := t.TempDir() + cache, err := NewCache(dir) + Expect(err).ToNot(HaveOccurred()) + + keyPath := filepath.Join(dir, hmacKeyFilename) + keyBefore, err := os.ReadFile(keyPath) + Expect(err).ToNot(HaveOccurred()) + + // Drop in a corrupt entry that Clean would normally remove. + cKey := CacheKey{Operation: "sbom", ImageDigest: "sha256:c", ConfigHash: "h"} + cPath := cache.getPath(cKey) + Expect(os.MkdirAll(filepath.Dir(cPath), 0o700)).To(Succeed()) + Expect(os.WriteFile(cPath, []byte("not json"), 0o600)).To(Succeed()) + + Expect(cache.Clean()).To(Succeed()) + + // The HMAC key must survive. + keyAfter, err := os.ReadFile(keyPath) + Expect(err).ToNot(HaveOccurred()) + Expect(keyAfter).To(Equal(keyBefore)) + + // The corrupt entry must be gone. + _, err = os.Stat(cPath) + Expect(os.IsNotExist(err)).To(BeTrue()) +}