From 012dfcdb15cf931df4d85769d806564c373271a2 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 5 May 2026 23:16:35 +0000 Subject: [PATCH 1/4] ci: add CodeQL + OWASP Dependency-Check, attach SBOM and attestations to releases - codeql.yml runs the security-and-quality query suite on push, PR, and weekly cron, uploading findings to the GitHub Security tab. - dependency-check.yml runs the OWASP SCA scan on push, PR, and weekly cron. Wired via a new "security" Maven profile that fails the build on CVSS >= 7 in runtime/compile scopes; SARIF goes to the Security tab. - release.yml now collects per-module CycloneDX SBOMs from the release:perform checkout, mints actions/attest-build-provenance for the JARs and actions/attest-sbom attestations linking each SBOM to its JAR, and attaches the SBOM JSON+XML to the GitHub Release. --- .github/workflows/codeql.yml | 55 +++++++++++++++++++ .github/workflows/dependency-check.yml | 74 ++++++++++++++++++++++++++ .github/workflows/release.yml | 65 +++++++++++++++++++++- CHANGELOG.md | 10 ++++ pom.xml | 46 ++++++++++++++++ 5 files changed, 249 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/dependency-check.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..baad1b2 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,55 @@ +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Weekly Monday 09:30 ET (13:30 UTC, no DST drift worth caring about for a + # weekly cadence). Catches CVEs that land in CodeQL's queries between + # commits when the repo is otherwise quiet. + - cron: '30 13 * * 1' + +permissions: + contents: read + security-events: write + actions: read + +jobs: + analyze: + name: Analyze (java) + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: '17' + + - uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: maven-codeql-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-codeql- + + - uses: github/codeql-action/init@v4 + with: + languages: java-kotlin + # security-and-quality includes the security-extended pack plus + # maintainability/reliability queries — fine for a small library. + queries: security-and-quality + + # Manual build: skip tests + spotless/license to keep CodeQL extraction + # focused on compilable bytecode. The full verify matrix in ci.yml owns + # those checks. + - name: Compile + run: mvn -B -ntp -DskipTests -Dspotless.check.skip=true -Dlicense.skip=true package + + - uses: github/codeql-action/analyze@v4 + with: + category: /language:java-kotlin diff --git a/.github/workflows/dependency-check.yml b/.github/workflows/dependency-check.yml new file mode 100644 index 0000000..c8f2cf6 --- /dev/null +++ b/.github/workflows/dependency-check.yml @@ -0,0 +1,74 @@ +name: Dependency-Check + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Weekly Tuesday 09:30 ET (13:30 UTC). Offset from CodeQL's Monday cron so + # a slow NVD API morning doesn't pile both onto the same hour. + - cron: '30 13 * * 2' + +permissions: + contents: read + security-events: write + actions: read + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: '17' + + - uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: maven-depcheck-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-depcheck- + + # NVD data cache: dependency-check stores the NVD feed under + # ~/.m2/repository/org/owasp/dependency-check-data/. Cache it across + # runs so we only pay full-download cost on the weekly cron. + - uses: actions/cache@v5 + with: + path: ~/.m2/repository/org/owasp/dependency-check-data + key: depcheck-nvd-${{ github.run_id }} + restore-keys: | + depcheck-nvd- + + - name: Run dependency-check + run: | + mvn -B -ntp -P security \ + -DskipTests=true \ + -Dspotless.check.skip=true \ + -Dlicense.skip=true \ + ${NVD_API_KEY:+-DnvdApiKey=$NVD_API_KEY} \ + verify + env: + # Optional: set NVD_API_KEY in repo secrets to lift NVD rate limits. + # Without it, the job still works but is slower on cold caches. + NVD_API_KEY: ${{ secrets.NVD_API_KEY }} + + - name: Upload SARIF to GitHub Security tab + if: always() + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: target/dependency-check/dependency-check-report.sarif + category: dependency-check + + - name: Upload HTML/JSON report + if: always() + uses: actions/upload-artifact@v7 + with: + name: dependency-check-report + path: target/dependency-check/ + retention-days: 14 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bda5080..325b88d 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -76,6 +76,11 @@ jobs: needs: verify permissions: contents: write + # id-token + attestations needed by actions/attest-build-provenance and + # actions/attest-sbom — these mint sigstore-backed attestations against + # the GitHub OIDC token and write them to the repo's attestation log. + id-token: write + attestations: write runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -116,6 +121,59 @@ jobs: OSSRH_TOKEN: ${{ secrets.OSSRH_TOKEN }} MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }} + # release:perform checks the tagged code into target/checkout/ and runs + # the central+sign profiles (see in pom.xml). The + # cyclonedx plugin (bound to the central profile, package phase) drops + # bom.{json,xml} into each module's target/. Collect them under a + # versioned name so consumers downloading from the GH Release can tell + # which module/version the SBOM corresponds to. + - name: Collect release artifacts (JARs + SBOMs) + id: collect + run: | + set -euo pipefail + RELEASE_VERSION="${RELEASE_TAG#v}" + echo "RELEASE_VERSION=${RELEASE_VERSION}" >> "$GITHUB_ENV" + mkdir -p release-assets + # SBOMs — one per module, both formats + for module in magika-java magika-java-tika; do + for fmt in json xml; do + src="target/checkout/${module}/target/bom.${fmt}" + if [ -f "$src" ]; then + cp "$src" "release-assets/${module}-${RELEASE_VERSION}-cyclonedx.${fmt}" + else + echo "::warning::SBOM not found at $src" + fi + done + done + # Main JARs (for build-provenance attestation; sources/javadoc skipped + # — attesting the primary artifact is what consumers verify against). + for module in magika-java magika-java-tika; do + jar="target/checkout/${module}/target/${module}-${RELEASE_VERSION}.jar" + if [ -f "$jar" ]; then + cp "$jar" "release-assets/" + else + echo "::warning::JAR not found at $jar" + fi + done + ls -la release-assets/ + + - name: Attest build provenance for JARs + uses: actions/attest-build-provenance@v3 + with: + subject-path: 'release-assets/*.jar' + + - name: Attest magika-java SBOM + uses: actions/attest-sbom@v3 + with: + subject-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}.jar' + sbom-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}-cyclonedx.json' + + - name: Attest magika-java-tika SBOM + uses: actions/attest-sbom@v3 + with: + subject-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}.jar' + sbom-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}-cyclonedx.json' + - name: Extract CHANGELOG section + create GitHub Release run: | RELEASE_VERSION="${RELEASE_TAG#v}" @@ -168,10 +226,15 @@ jobs: # Use --notes-file (NOT --notes) — CHANGELOG content can contain markdown # that breaks shell-quoting if passed as a string. + # SBOMs are attached as release assets so consumers who don't pull from + # Maven Central (or who want the aggregate parent SBOM at a glance) can + # download them directly. JARs live on Central — no need to duplicate. gh release create "${RELEASE_TAG}" \ --title "${RELEASE_TAG}" \ --notes-file release-notes.md \ - --latest + --latest \ + release-assets/*-cyclonedx.json \ + release-assets/*-cyclonedx.xml env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e21664..f0527fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `VerificationReason`. - Optional `dev.jcputney:magika-java-tika` artifact with an embedded-ONNX Apache Tika `Detector` adapter and service-loader registration. +- GitHub CodeQL static analysis workflow (`.github/workflows/codeql.yml`) running on + push, pull request, and weekly cron with the `security-and-quality` query suite. +- OWASP Dependency-Check SCA scan (`.github/workflows/dependency-check.yml`) running + on push, pull request, and weekly cron. Wired via a `security` Maven profile that + fails the build on CVSS ≥ 7 in runtime/compile scopes; SARIF report is uploaded to + the GitHub Security tab. +- Release workflow now attaches per-module CycloneDX SBOMs (JSON + XML) to the GitHub + Release and generates Sigstore-backed `actions/attest-build-provenance` attestations + for the published JARs plus `actions/attest-sbom` attestations linking each SBOM to + its JAR. ### Changed diff --git a/pom.xml b/pom.xml index aa0d2fb..469a2cb 100644 --- a/pom.xml +++ b/pom.xml @@ -73,6 +73,7 @@ 3.3.1 0.10.0 2.9.1 + 12.1.0 @@ -344,6 +345,51 @@ + + security + + + + + org.owasp + dependency-check-maven + ${version.plugin.dependency-check} + + + 7 + + HTML + SARIF + JSON + + + true + true + true + + ${project.build.directory}/dependency-check + + + + dependency-check + verify + + aggregate + + + + + + + github From 24c25b9a5ee1dd92edabbc41299f6e9dbc951f38 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 5 May 2026 23:23:13 +0000 Subject: [PATCH 2/4] ci(release): fail fast on missing SBOM/JAR instead of warning + glob expansion Previously the collect step emitted only ::warning:: when an expected SBOM or JAR was absent, then `gh release create release-assets/*-cyclonedx.{json,xml}` would receive an unexpanded glob and fail with a confusing "no such file" error several steps later. Now any missing artifact is an ::error:: with a path-precise message and the step exits non-zero before attestation/release creation runs. --- .github/workflows/release.yml | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 325b88d..12bb2e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -134,14 +134,19 @@ jobs: RELEASE_VERSION="${RELEASE_TAG#v}" echo "RELEASE_VERSION=${RELEASE_VERSION}" >> "$GITHUB_ENV" mkdir -p release-assets - # SBOMs — one per module, both formats + missing=0 + # SBOMs — one per module, both formats. ALL are required: a missing + # SBOM means the central profile didn't run cyclonedx (broken release + # config) and we'd rather fail here with a precise path than later + # when `gh release create` chokes on an unexpanded glob. for module in magika-java magika-java-tika; do for fmt in json xml; do src="target/checkout/${module}/target/bom.${fmt}" if [ -f "$src" ]; then cp "$src" "release-assets/${module}-${RELEASE_VERSION}-cyclonedx.${fmt}" else - echo "::warning::SBOM not found at $src" + echo "::error::Expected SBOM not found at $src — did the central profile run cyclonedx?" + missing=1 fi done done @@ -152,9 +157,14 @@ jobs: if [ -f "$jar" ]; then cp "$jar" "release-assets/" else - echo "::warning::JAR not found at $jar" + echo "::error::Expected JAR not found at $jar" + missing=1 fi done + if [ "$missing" -ne 0 ]; then + echo "::error::Required release artifacts are missing (see errors above); aborting before GitHub Release" + exit 1 + fi ls -la release-assets/ - name: Attest build provenance for JARs From 9f7ea7e7ec66a696d2c4666f970f08c7ff1477eb Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 5 May 2026 23:24:13 +0000 Subject: [PATCH 3/4] ci(depcheck): key NVD cache by ISO week instead of run_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `github.run_id` made every workflow run create a brand-new cache entry, causing storage churn and eventual eviction of older entries. The restore-keys prefix still pulled in the prior cache, so functionally it worked, but it wasted GitHub Actions cache quota. Switch to a stable per-OS key bucketed by ISO year-week (`%G-%V`): PR/push runs within the same week share one cache entry, and the weekly cron run naturally rolls to a fresh key — refreshing the NVD feed without manual cache-busting. --- .github/workflows/dependency-check.yml | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/.github/workflows/dependency-check.yml b/.github/workflows/dependency-check.yml index c8f2cf6..16119ee 100644 --- a/.github/workflows/dependency-check.yml +++ b/.github/workflows/dependency-check.yml @@ -36,14 +36,21 @@ jobs: maven-depcheck- # NVD data cache: dependency-check stores the NVD feed under - # ~/.m2/repository/org/owasp/dependency-check-data/. Cache it across - # runs so we only pay full-download cost on the weekly cron. + # ~/.m2/repository/org/owasp/dependency-check-data/. Key on ISO + # year-week (`%G-%V`) so PR/push runs in the same week share one + # entry; the weekly cron naturally rolls to a new key. Without this, + # using `github.run_id` would create a fresh cache every run and + # cause cache churn / storage bloat. + - name: Compute weekly cache bucket + id: cachekey + run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT" + - uses: actions/cache@v5 with: path: ~/.m2/repository/org/owasp/dependency-check-data - key: depcheck-nvd-${{ github.run_id }} + key: depcheck-nvd-${{ runner.os }}-${{ steps.cachekey.outputs.week }} restore-keys: | - depcheck-nvd- + depcheck-nvd-${{ runner.os }}- - name: Run dependency-check run: | From 4834228400f6c1a00d7be3686b1089485158365b Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 5 May 2026 23:36:21 +0000 Subject: [PATCH 4/4] ci(depcheck): scope plugin to root + relocate NVD cache off ~/.m2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues with the previous configuration: 1. The dependency-check execution sat in a profile under in the parent pom, which means children inherited it. Each child would invoke the `aggregate` goal during its own verify phase — redundant at best, and `aggregate` is a reactor-root goal that can fail in non- aggregator contexts. Marking the plugin false keeps it scoped to the parent reactor. 2. The workflow cached ~/.m2/repository AND ~/.m2/repository/org/owasp/dependency-check-data — overlapping paths, causing redundant storage and unpredictable restore semantics. Relocate the plugin's dataDirectory to ~/.cache/dependency-check-data so the NVD cache path is disjoint from the maven dependency cache. --- .github/workflows/dependency-check.yml | 14 +++++++------- pom.xml | 9 +++++++++ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/.github/workflows/dependency-check.yml b/.github/workflows/dependency-check.yml index 16119ee..cf08ec5 100644 --- a/.github/workflows/dependency-check.yml +++ b/.github/workflows/dependency-check.yml @@ -35,19 +35,19 @@ jobs: restore-keys: | maven-depcheck- - # NVD data cache: dependency-check stores the NVD feed under - # ~/.m2/repository/org/owasp/dependency-check-data/. Key on ISO - # year-week (`%G-%V`) so PR/push runs in the same week share one - # entry; the weekly cron naturally rolls to a new key. Without this, - # using `github.run_id` would create a fresh cache every run and - # cause cache churn / storage bloat. + # NVD data cache: pom.xml relocates the dependency-check dataDirectory + # to ~/.cache/dependency-check-data so this cache path is disjoint from + # the ~/.m2/repository cache above (overlapping paths cause redundant + # storage and unpredictable restores). Key on ISO year-week (`%G-%V`) + # so PR/push runs share one entry within the week; the weekly cron + # naturally rolls to a new key, refreshing the NVD feed. - name: Compute weekly cache bucket id: cachekey run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT" - uses: actions/cache@v5 with: - path: ~/.m2/repository/org/owasp/dependency-check-data + path: ~/.cache/dependency-check-data key: depcheck-nvd-${{ runner.os }}-${{ steps.cachekey.outputs.week }} restore-keys: | depcheck-nvd-${{ runner.os }}- diff --git a/pom.xml b/pom.xml index 469a2cb..71ef370 100644 --- a/pom.xml +++ b/pom.xml @@ -358,6 +358,11 @@ org.owasp dependency-check-maven ${version.plugin.dependency-check} + + false ${project.build.directory}/dependency-check + + ${user.home}/.cache/dependency-check-data