Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: CodeQL

on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
# Weekly Monday 09:30 ET (13:30 UTC, no DST drift worth caring about for a
# weekly cadence). Catches CVEs that land in CodeQL's queries between
# commits when the repo is otherwise quiet.
- cron: '30 13 * * 1'

permissions:
contents: read
security-events: write
actions: read

jobs:
analyze:
name: Analyze (java)
runs-on: ubuntu-latest
timeout-minutes: 30

steps:
- uses: actions/checkout@v6

- uses: actions/setup-java@v5
with:
distribution: temurin
java-version: '17'

- uses: actions/cache@v5
with:
path: ~/.m2/repository
key: maven-codeql-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-codeql-

- uses: github/codeql-action/init@v4
with:
languages: java-kotlin
# security-and-quality includes the security-extended pack plus
# maintainability/reliability queries — fine for a small library.
queries: security-and-quality

# Manual build: skip tests + spotless/license to keep CodeQL extraction
# focused on compilable bytecode. The full verify matrix in ci.yml owns
# those checks.
- name: Compile
run: mvn -B -ntp -DskipTests -Dspotless.check.skip=true -Dlicense.skip=true package

- uses: github/codeql-action/analyze@v4
with:
category: /language:java-kotlin
81 changes: 81 additions & 0 deletions .github/workflows/dependency-check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Dependency-Check

on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
# Weekly Tuesday 09:30 ET (13:30 UTC). Offset from CodeQL's Monday cron so
# a slow NVD API morning doesn't pile both onto the same hour.
- cron: '30 13 * * 2'

permissions:
contents: read
security-events: write
actions: read

jobs:
scan:
runs-on: ubuntu-latest
timeout-minutes: 30

steps:
- uses: actions/checkout@v6

- uses: actions/setup-java@v5
with:
distribution: temurin
java-version: '17'

- uses: actions/cache@v5
with:
path: ~/.m2/repository
key: maven-depcheck-${{ hashFiles('**/pom.xml') }}
restore-keys: |
maven-depcheck-

# NVD data cache: pom.xml relocates the dependency-check dataDirectory
# to ~/.cache/dependency-check-data so this cache path is disjoint from
# the ~/.m2/repository cache above (overlapping paths cause redundant
# storage and unpredictable restores). Key on ISO year-week (`%G-%V`)
# so PR/push runs share one entry within the week; the weekly cron
# naturally rolls to a new key, refreshing the NVD feed.
- name: Compute weekly cache bucket
id: cachekey
run: echo "week=$(date -u +%G-%V)" >> "$GITHUB_OUTPUT"

- uses: actions/cache@v5
with:
path: ~/.cache/dependency-check-data
key: depcheck-nvd-${{ runner.os }}-${{ steps.cachekey.outputs.week }}
restore-keys: |
depcheck-nvd-${{ runner.os }}-

- name: Run dependency-check
run: |
mvn -B -ntp -P security \
-DskipTests=true \
-Dspotless.check.skip=true \
-Dlicense.skip=true \
${NVD_API_KEY:+-DnvdApiKey=$NVD_API_KEY} \
verify
env:
# Optional: set NVD_API_KEY in repo secrets to lift NVD rate limits.
# Without it, the job still works but is slower on cold caches.
NVD_API_KEY: ${{ secrets.NVD_API_KEY }}

- name: Upload SARIF to GitHub Security tab
if: always()
uses: github/codeql-action/upload-sarif@v4
with:
sarif_file: target/dependency-check/dependency-check-report.sarif
category: dependency-check

- name: Upload HTML/JSON report
if: always()
uses: actions/upload-artifact@v7
with:
name: dependency-check-report
path: target/dependency-check/
retention-days: 14
75 changes: 74 additions & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,11 @@ jobs:
needs: verify
permissions:
contents: write
# id-token + attestations needed by actions/attest-build-provenance and
# actions/attest-sbom — these mint sigstore-backed attestations against
# the GitHub OIDC token and write them to the repo's attestation log.
id-token: write
attestations: write
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
Expand Down Expand Up @@ -116,6 +121,69 @@ jobs:
OSSRH_TOKEN: ${{ secrets.OSSRH_TOKEN }}
MAVEN_GPG_PASSPHRASE: ${{ secrets.MAVEN_GPG_PASSPHRASE }}

# release:perform checks the tagged code into target/checkout/ and runs
# the central+sign profiles (see <releaseProfiles> in pom.xml). The
# cyclonedx plugin (bound to the central profile, package phase) drops
# bom.{json,xml} into each module's target/. Collect them under a
# versioned name so consumers downloading from the GH Release can tell
# which module/version the SBOM corresponds to.
- name: Collect release artifacts (JARs + SBOMs)
id: collect
run: |
set -euo pipefail
RELEASE_VERSION="${RELEASE_TAG#v}"
echo "RELEASE_VERSION=${RELEASE_VERSION}" >> "$GITHUB_ENV"
mkdir -p release-assets
missing=0
# SBOMs — one per module, both formats. ALL are required: a missing
# SBOM means the central profile didn't run cyclonedx (broken release
# config) and we'd rather fail here with a precise path than later
# when `gh release create` chokes on an unexpanded glob.
for module in magika-java magika-java-tika; do
for fmt in json xml; do
src="target/checkout/${module}/target/bom.${fmt}"
if [ -f "$src" ]; then
cp "$src" "release-assets/${module}-${RELEASE_VERSION}-cyclonedx.${fmt}"
else
echo "::error::Expected SBOM not found at $src — did the central profile run cyclonedx?"
missing=1
fi
done
done
# Main JARs (for build-provenance attestation; sources/javadoc skipped
# — attesting the primary artifact is what consumers verify against).
for module in magika-java magika-java-tika; do
jar="target/checkout/${module}/target/${module}-${RELEASE_VERSION}.jar"
if [ -f "$jar" ]; then
cp "$jar" "release-assets/"
else
echo "::error::Expected JAR not found at $jar"
missing=1
fi
done
if [ "$missing" -ne 0 ]; then
echo "::error::Required release artifacts are missing (see errors above); aborting before GitHub Release"
exit 1
fi
ls -la release-assets/

- name: Attest build provenance for JARs
uses: actions/attest-build-provenance@v3
with:
subject-path: 'release-assets/*.jar'

- name: Attest magika-java SBOM
uses: actions/attest-sbom@v3
with:
subject-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}.jar'
sbom-path: 'release-assets/magika-java-${{ env.RELEASE_VERSION }}-cyclonedx.json'

- name: Attest magika-java-tika SBOM
uses: actions/attest-sbom@v3
with:
subject-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}.jar'
sbom-path: 'release-assets/magika-java-tika-${{ env.RELEASE_VERSION }}-cyclonedx.json'

- name: Extract CHANGELOG section + create GitHub Release
run: |
RELEASE_VERSION="${RELEASE_TAG#v}"
Expand Down Expand Up @@ -168,10 +236,15 @@ jobs:

# Use --notes-file (NOT --notes) — CHANGELOG content can contain markdown
# that breaks shell-quoting if passed as a string.
# SBOMs are attached as release assets so consumers who don't pull from
# Maven Central (or who want the aggregate parent SBOM at a glance) can
# download them directly. JARs live on Central — no need to duplicate.
gh release create "${RELEASE_TAG}" \
--title "${RELEASE_TAG}" \
--notes-file release-notes.md \
--latest
--latest \
release-assets/*-cyclonedx.json \
release-assets/*-cyclonedx.xml
Comment on lines +239 to +247
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
`VerificationReason`.
- Optional `dev.jcputney:magika-java-tika` artifact with an embedded-ONNX Apache Tika
`Detector` adapter and service-loader registration.
- GitHub CodeQL static analysis workflow (`.github/workflows/codeql.yml`) running on
push, pull request, and weekly cron with the `security-and-quality` query suite.
- OWASP Dependency-Check SCA scan (`.github/workflows/dependency-check.yml`) running
on push, pull request, and weekly cron. Wired via a `security` Maven profile that
fails the build on CVSS ≥ 7 in runtime/compile scopes; SARIF report is uploaded to
the GitHub Security tab.
- Release workflow now attaches per-module CycloneDX SBOMs (JSON + XML) to the GitHub
Release and generates Sigstore-backed `actions/attest-build-provenance` attestations
for the published JARs plus `actions/attest-sbom` attestations linking each SBOM to
its JAR.

### Changed

Expand Down
55 changes: 55 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
<version.plugin.release>3.3.1</version.plugin.release>
<version.plugin.central-publishing>0.10.0</version.plugin.central-publishing>
<version.plugin.cyclonedx>2.9.1</version.plugin.cyclonedx>
<version.plugin.dependency-check>12.1.0</version.plugin.dependency-check>
</properties>

<dependencyManagement>
Expand Down Expand Up @@ -344,6 +345,60 @@
</plugins>
</build>
</profile>
<profile>
<id>security</id>
<!-- OWASP Dependency-Check (SCA). Opt-in profile because the NVD feed
download is heavy (~hundreds of MB) and slow to refresh. CI uses
.github/workflows/dependency-check.yml; locally run:
mvn -P security verify -DnvdApiKey=...
Without an NVD API key the run still works but rate-limits hard. -->
<build>
<plugins>
<plugin>
<groupId>org.owasp</groupId>
<artifactId>dependency-check-maven</artifactId>
<version>${version.plugin.dependency-check}</version>
<!-- aggregate is a reactor-root goal; inheritance would
cause each child module to invoke it during its own
verify phase, which is redundant at best and can
fail on the non-aggregator execution context. -->
<inherited>false</inherited>
<configuration>
<!-- Fail build on High/Critical (CVSS >= 7.0). Mediums get
reported but don't block — keeps the gate signal/noise
reasonable for a small library. -->
<failBuildOnCVSS>7</failBuildOnCVSS>
<formats>
<format>HTML</format>
<format>SARIF</format>
<format>JSON</format>
</formats>
<!-- Skip dev/test scopes — CVEs in junit/assertj/archunit
don't ship to consumers. -->
<skipTestScope>true</skipTestScope>
<skipProvidedScope>true</skipProvidedScope>
<skipSystemScope>true</skipSystemScope>
<!-- Aggregate report at parent level so the workflow has
one SARIF to upload. -->
<outputDirectory>${project.build.directory}/dependency-check</outputDirectory>
<!-- Move the NVD feed cache out of ~/.m2/repository so
the workflow can cache it on a separate (weekly) key
without overlapping the maven dependency cache. -->
<dataDirectory>${user.home}/.cache/dependency-check-data</dataDirectory>
</configuration>
<executions>
<execution>
<id>dependency-check</id>
<phase>verify</phase>
<goals>
<goal>aggregate</goal>
</goals>
</execution>
</executions>
Comment on lines +355 to +397
</plugin>
</plugins>
</build>
</profile>
<profile>
<id>github</id>
<distributionManagement>
Expand Down
Loading