Skip to content

Commit 401116e

Browse files
committed
Fix: clean SBOM output - strip synthetic pkgs, test files, noise; fix name/namespace from wheel tags
1 parent 90f4b78 commit 401116e

2 files changed

Lines changed: 62 additions & 16 deletions

File tree

.github/scripts/patch_sbom.py

Lines changed: 58 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,50 @@
11
#!/usr/bin/env python3
22
"""
33
patch_sbom.py — Post-process a Trivy-generated SPDX JSON file to:
4-
1. Set name to the package name (not the scanned folder name)
4+
1. Set name to the package name (not the wheel filename)
55
2. Set documentNamespace to the required pattern
66
3. Set creationInfo.creators (Organization + Tool line)
77
4. Set creationInfo.created (current UTC timestamp)
88
5. Set spdxVersion to SPDX-2.3
9-
6. Remove any annotations / comments Trivy adds to packages
10-
11-
Usage:
12-
python patch_sbom.py \\
13-
--input trivy_raw.spdx.json \\
14-
--output spotfire.sbom.spdx.json \\
15-
--name "spotfire-2.5.0" \\
16-
--namespace "https://spotfire.com/spdx/spotfire-2.5.0/2026-03-24T12:00:00Z" \\
17-
--org "Cloud Software Group, Inc., Spotfire" \\
18-
--tool "trivy-0.69.3"
9+
6. Remove synthetic Trivy filesystem/source root packages
10+
7. Remove internal test/noise packages (e.g. setuptools' my-test-package)
11+
8. Remove the files[] section (internal test eggs, not real deliverables)
12+
9. Remove annotations / comments Trivy adds to packages
1913
"""
2014

2115
import argparse
2216
import json
17+
import re
2318
from datetime import datetime, timezone
2419

2520
_STRIP_PKG_FIELDS = {"annotations", "comment"}
2621

22+
# Packages Trivy picks up from setuptools internals — not real deliverable deps
23+
_NOISE_PACKAGE_NAMES = {
24+
"my-test-package",
25+
"my_test_package",
26+
}
27+
28+
# SPDX package purposes that are Trivy synthetic scan-root artefacts
29+
_SYNTHETIC_PURPOSES = {"SOURCE"}
30+
31+
32+
def _is_synthetic(pkg: dict) -> bool:
33+
"""True for Trivy's filesystem scan-root package (not a real dependency)."""
34+
if pkg.get("primaryPackagePurpose") in _SYNTHETIC_PURPOSES:
35+
# Must also have no purl to be sure it's the scan root
36+
refs = pkg.get("externalRefs", [])
37+
if not any(r.get("referenceType") == "purl" for r in refs):
38+
return True
39+
return False
40+
41+
42+
def _is_noise(pkg: dict) -> bool:
43+
"""True for known internal test packages bundled inside setuptools."""
44+
return pkg.get("name", "").lower().replace("-", "_") in {
45+
n.replace("-", "_") for n in _NOISE_PACKAGE_NAMES
46+
}
47+
2748

2849
def patch(input_path: str, output_path: str,
2950
name: str, namespace: str, org: str, tool: str) -> None:
@@ -34,7 +55,7 @@ def patch(input_path: str, output_path: str,
3455
# 1. SPDX version
3556
doc["spdxVersion"] = "SPDX-2.3"
3657

37-
# 2. Document name — replace Trivy's folder name with the real package name
58+
# 2. Document name — clean package name, not wheel filename
3859
doc["name"] = name
3960

4061
# 3. Namespace
@@ -49,10 +70,31 @@ def patch(input_path: str, output_path: str,
4970
f"Tool: {tool}",
5071
]
5172

52-
# 5. Strip Trivy annotations / comments from every package
73+
# 5. Filter packages — remove synthetic scan-root + noise packages
74+
removed_spdxids = set()
75+
kept_packages = []
5376
for pkg in doc.get("packages", []):
77+
if _is_synthetic(pkg) or _is_noise(pkg):
78+
removed_spdxids.add(pkg.get("SPDXID"))
79+
continue
80+
# Strip Trivy-added fields
5481
for field in _STRIP_PKG_FIELDS:
5582
pkg.pop(field, None)
83+
kept_packages.append(pkg)
84+
doc["packages"] = kept_packages
85+
86+
# 6. Remove files[] section entirely (internal test eggs, not deliverables)
87+
doc.pop("files", None)
88+
89+
# 7. Remove relationships that reference removed packages or files
90+
kept_rels = []
91+
for rel in doc.get("relationships", []):
92+
if (rel.get("spdxElementId") in removed_spdxids or
93+
rel.get("relatedSpdxElement") in removed_spdxids):
94+
continue
95+
# Also drop DESCRIBES relationships pointing at the old scan-root
96+
kept_rels.append(rel)
97+
doc["relationships"] = kept_rels
5698

5799
with open(output_path, "w", encoding="utf-8") as f:
58100
json.dump(doc, f, indent=2, ensure_ascii=False)
@@ -63,14 +105,15 @@ def patch(input_path: str, output_path: str,
63105
print(f" namespace : {namespace}")
64106
print(f" created : {now}")
65107
print(f" creators : Organization: {org} | Tool: {tool}")
66-
print(f" packages : {len(doc.get('packages', []))}")
108+
print(f" packages : {len(doc.get('packages', []))} kept, {len(removed_spdxids)} removed")
109+
print(f" files : removed (internal test artefacts)")
67110

68111

69112
def main() -> None:
70113
p = argparse.ArgumentParser(description=__doc__)
71114
p.add_argument("--input", required=True)
72115
p.add_argument("--output", required=True)
73-
p.add_argument("--name", required=True, help="SPDX document name (package name + version)")
116+
p.add_argument("--name", required=True, help="Clean package name e.g. spotfire-2.5.0")
74117
p.add_argument("--namespace", required=True)
75118
p.add_argument("--org", required=True)
76119
p.add_argument("--tool", required=True)

.github/workflows/sbom.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ jobs:
8787
- name: Set SBOM metadata
8888
id: meta
8989
run: |
90+
# Extract just "spotfire-2.5.0.dev0" from "spotfire-2.5.0.dev0.tar.gz"
9091
PKG_NAME=$(ls dist/spotfire-*.tar.gz | sed 's|dist/||;s|\.tar\.gz||')
9192
CREATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
9293
echo "pkg_name=$PKG_NAME" >> $GITHUB_OUTPUT
@@ -172,7 +173,9 @@ jobs:
172173
- name: Set SBOM metadata
173174
id: meta
174175
run: |
175-
PKG_NAME=$(ls dist/spotfire-*.whl | sed 's|dist/||;s|\.whl||')
176+
# Extract just "spotfire-2.5.0.dev0" from the full wheel filename
177+
# wheel format: {name}-{version}-{python}-{abi}-{platform}.whl
178+
PKG_NAME=$(ls dist/spotfire-*.whl | sed 's|dist/||;s|\.whl||' | cut -d- -f1,2)
176179
CREATED=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
177180
echo "pkg_name=$PKG_NAME" >> $GITHUB_OUTPUT
178181
echo "namespace=${{ env.SBOM_NS_BASE }}/$PKG_NAME/$CREATED" >> $GITHUB_OUTPUT

0 commit comments

Comments
 (0)