diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..baad1b2 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,55 @@ +name: CodeQL + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Weekly Monday 09:30 ET (13:30 UTC, no DST drift worth caring about for a + # weekly cadence). Catches CVEs that land in CodeQL's queries between + # commits when the repo is otherwise quiet. + - cron: '30 13 * * 1' + +permissions: + contents: read + security-events: write + actions: read + +jobs: + analyze: + name: Analyze (java) + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: '17' + + - uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: maven-codeql-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-codeql- + + - uses: github/codeql-action/init@v4 + with: + languages: java-kotlin + # security-and-quality includes the security-extended pack plus + # maintainability/reliability queries — fine for a small library. + queries: security-and-quality + + # Manual build: skip tests + spotless/license to keep CodeQL extraction + # focused on compilable bytecode. The full verify matrix in ci.yml owns + # those checks. + - name: Compile + run: mvn -B -ntp -DskipTests -Dspotless.check.skip=true -Dlicense.skip=true package + + - uses: github/codeql-action/analyze@v4 + with: + category: /language:java-kotlin diff --git a/.github/workflows/dependency-check.yml b/.github/workflows/dependency-check.yml new file mode 100644 index 0000000..cf08ec5 --- /dev/null +++ b/.github/workflows/dependency-check.yml @@ -0,0 +1,81 @@ +name: Dependency-Check + +on: + push: + branches: [main] + pull_request: + branches: [main] + schedule: + # Weekly Tuesday 09:30 ET (13:30 UTC). Offset from CodeQL's Monday cron so + # a slow NVD API morning doesn't pile both onto the same hour. + - cron: '30 13 * * 2' + +permissions: + contents: read + security-events: write + actions: read + +jobs: + scan: + runs-on: ubuntu-latest + timeout-minutes: 30 + + steps: + - uses: actions/checkout@v6 + + - uses: actions/setup-java@v5 + with: + distribution: temurin + java-version: '17' + + - uses: actions/cache@v5 + with: + path: ~/.m2/repository + key: maven-depcheck-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-depcheck- + + # NVD data cache: pom.xml relocates the dependency-check dataDirectory + # to ~/.cache/dependency-check-data so this cache path is disjoint from + # the ~/.m2/repository cache above (overlapping paths cause redundant + # storage and unpredictable restores). Key on ISO year-week (`%G-%V`) + # so PR/push runs share one entry within the week; the weekly cron + # naturally rolls to a new key, refreshing the NVD feed. + - name: Compute weekly cache bucket + id: cachekey + run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT" + + - uses: actions/cache@v5 + with: + path: ~/.cache/dependency-check-data + key: depcheck-nvd-${{ runner.os }}-${{ steps.cachekey.outputs.week }} + restore-keys: | + depcheck-nvd-${{ runner.os }}- + + - name: Run dependency-check + run: | + mvn -B -ntp -P security \ + -DskipTests=true \ + -Dspotless.check.skip=true \ + -Dlicense.skip=true \ + ${NVD_API_KEY:+-DnvdApiKey=$NVD_API_KEY} \ + verify + env: + # Optional: set NVD_API_KEY in repo secrets to lift NVD rate limits. + # Without it, the job still works but is slower on cold caches. + NVD_API_KEY: ${{ secrets.NVD_API_KEY }} + + - name: Upload SARIF to GitHub Security tab + if: always() + uses: github/codeql-action/upload-sarif@v4 + with: + sarif_file: target/dependency-check/dependency-check-report.sarif + category: dependency-check + + - name: Upload HTML/JSON report + if: always() + uses: actions/upload-artifact@v7 + with: + name: dependency-check-report + path: target/dependency-check/ + retention-days: 14 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index bda5080..12bb2e6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -76,6 +76,11 @@ jobs: needs: verify permissions: contents: write + # id-token + attestations needed by actions/attest-build-provenance and + # actions/attest-sbom — these mint sigstore-backed attestations against + # the GitHub OIDC token and write them to the repo's attestation log. + id-token: write + attestations: write runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -116,6 +121,69 @@ jobs: OSSRH_TOKEN: ${{ secrets.OSSRH_TOKEN }} MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }} + # release:perform checks the tagged code into target/checkout/ and runs + # the central+sign profiles (see in pom.xml). The + # cyclonedx plugin (bound to the central profile, package phase) drops + # bom.{json,xml} into each module's target/. Collect them under a + # versioned name so consumers downloading from the GH Release can tell + # which module/version the SBOM corresponds to. + - name: Collect release artifacts (JARs + SBOMs) + id: collect + run: | + set -euo pipefail + RELEASE_VERSION="${RELEASE_TAG#v}" + echo "RELEASE_VERSION=${RELEASE_VERSION}" >> "$GITHUB_ENV" + mkdir -p release-assets + missing=0 + # SBOMs — one per module, both formats. ALL are required: a missing + # SBOM means the central profile didn't run cyclonedx (broken release + # config) and we'd rather fail here with a precise path than later + # when `gh release create` chokes on an unexpanded glob. + for module in magika-java magika-java-tika; do + for fmt in json xml; do + src="target/checkout/${module}/target/bom.${fmt}" + if [ -f "$src" ]; then + cp "$src" "release-assets/${module}-${RELEASE_VERSION}-cyclonedx.${fmt}" + else + echo "::error::Expected SBOM not found at $src — did the central profile run cyclonedx?" + missing=1 + fi + done + done + # Main JARs (for build-provenance attestation; sources/javadoc skipped + # — attesting the primary artifact is what consumers verify against). + for module in magika-java magika-java-tika; do + jar="target/checkout/${module}/target/${module}-${RELEASE_VERSION}.jar" + if [ -f "$jar" ]; then + cp "$jar" "release-assets/" + else + echo "::error::Expected JAR not found at $jar" + missing=1 + fi + done + if [ "$missing" -ne 0 ]; then + echo "::error::Required release artifacts are missing (see errors above); aborting before GitHub Release" + exit 1 + fi + ls -la release-assets/ + + - name: Attest build provenance for JARs + uses: actions/attest-build-provenance@v3 + with: + subject-path: 'release-assets/*.jar' + + - name: Attest magika-java SBOM + uses: actions/attest-sbom@v3 + with: + subject-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}.jar' + sbom-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}-cyclonedx.json' + + - name: Attest magika-java-tika SBOM + uses: actions/attest-sbom@v3 + with: + subject-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}.jar' + sbom-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}-cyclonedx.json' + - name: Extract CHANGELOG section + create GitHub Release run: | RELEASE_VERSION="${RELEASE_TAG#v}" @@ -168,10 +236,15 @@ jobs: # Use --notes-file (NOT --notes) — CHANGELOG content can contain markdown # that breaks shell-quoting if passed as a string. + # SBOMs are attached as release assets so consumers who don't pull from + # Maven Central (or who want the aggregate parent SBOM at a glance) can + # download them directly. JARs live on Central — no need to duplicate. gh release create "${RELEASE_TAG}" \ --title "${RELEASE_TAG}" \ --notes-file release-notes.md \ - --latest + --latest \ + release-assets/*-cyclonedx.json \ + release-assets/*-cyclonedx.xml env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 1e21664..f0527fa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `VerificationReason`. - Optional `dev.jcputney:magika-java-tika` artifact with an embedded-ONNX Apache Tika `Detector` adapter and service-loader registration. +- GitHub CodeQL static analysis workflow (`.github/workflows/codeql.yml`) running on + push, pull request, and weekly cron with the `security-and-quality` query suite. +- OWASP Dependency-Check SCA scan (`.github/workflows/dependency-check.yml`) running + on push, pull request, and weekly cron. Wired via a `security` Maven profile that + fails the build on CVSS ≥ 7 in runtime/compile scopes; SARIF report is uploaded to + the GitHub Security tab. +- Release workflow now attaches per-module CycloneDX SBOMs (JSON + XML) to the GitHub + Release and generates Sigstore-backed `actions/attest-build-provenance` attestations + for the published JARs plus `actions/attest-sbom` attestations linking each SBOM to + its JAR. ### Changed diff --git a/pom.xml b/pom.xml index aa0d2fb..71ef370 100644 --- a/pom.xml +++ b/pom.xml @@ -73,6 +73,7 @@ 3.3.1 0.10.0 2.9.1 + 12.1.0 @@ -344,6 +345,60 @@ + + security + + + + + org.owasp + dependency-check-maven + ${version.plugin.dependency-check} + + false + + + 7 + + HTML + SARIF + JSON + + + true + true + true + + ${project.build.directory}/dependency-check + + ${user.home}/.cache/dependency-check-data + + + + dependency-check + verify + + aggregate + + + + + + + github