11#!/usr/bin/env python3
22"""
33patch_sbom.py — Post-process a Trivy-generated SPDX JSON file to:
4- 1. Set name to the package name (not the scanned folder name )
4+ 1. Set name to the package name (not the wheel filename )
55 2. Set documentNamespace to the required pattern
66 3. Set creationInfo.creators (Organization + Tool line)
77 4. Set creationInfo.created (current UTC timestamp)
88 5. Set spdxVersion to SPDX-2.3
9- 6. Remove any annotations / comments Trivy adds to packages
10-
11- Usage:
12- python patch_sbom.py \\
13- --input trivy_raw.spdx.json \\
14- --output spotfire.sbom.spdx.json \\
15- --name "spotfire-2.5.0" \\
16- --namespace "https://spotfire.com/spdx/spotfire-2.5.0/2026-03-24T12:00:00Z" \\
17- --org "Cloud Software Group, Inc., Spotfire" \\
18- --tool "trivy-0.69.3"
9+ 6. Remove synthetic Trivy filesystem/source root packages
10+ 7. Remove internal test/noise packages (e.g. setuptools' my-test-package)
11+ 8. Remove the files[] section (internal test eggs, not real deliverables)
12+ 9. Remove annotations / comments Trivy adds to packages
1913"""
2014
2115import argparse
2216import json
17+ import re
2318from datetime import datetime , timezone
2419
2520_STRIP_PKG_FIELDS = {"annotations" , "comment" }
2621
22+ # Packages Trivy picks up from setuptools internals — not real deliverable deps
23+ _NOISE_PACKAGE_NAMES = {
24+ "my-test-package" ,
25+ "my_test_package" ,
26+ }
27+
28+ # SPDX package purposes that are Trivy synthetic scan-root artefacts
29+ _SYNTHETIC_PURPOSES = {"SOURCE" }
30+
31+
32+ def _is_synthetic (pkg : dict ) -> bool :
33+ """True for Trivy's filesystem scan-root package (not a real dependency)."""
34+ if pkg .get ("primaryPackagePurpose" ) in _SYNTHETIC_PURPOSES :
35+ # Must also have no purl to be sure it's the scan root
36+ refs = pkg .get ("externalRefs" , [])
37+ if not any (r .get ("referenceType" ) == "purl" for r in refs ):
38+ return True
39+ return False
40+
41+
42+ def _is_noise (pkg : dict ) -> bool :
43+ """True for known internal test packages bundled inside setuptools."""
44+ return pkg .get ("name" , "" ).lower ().replace ("-" , "_" ) in {
45+ n .replace ("-" , "_" ) for n in _NOISE_PACKAGE_NAMES
46+ }
47+
2748
2849def patch (input_path : str , output_path : str ,
2950 name : str , namespace : str , org : str , tool : str ) -> None :
@@ -34,7 +55,7 @@ def patch(input_path: str, output_path: str,
3455 # 1. SPDX version
3556 doc ["spdxVersion" ] = "SPDX-2.3"
3657
37- # 2. Document name — replace Trivy's folder name with the real package name
58+ # 2. Document name — clean package name, not wheel filename
3859 doc ["name" ] = name
3960
4061 # 3. Namespace
@@ -49,10 +70,31 @@ def patch(input_path: str, output_path: str,
4970 f"Tool: { tool } " ,
5071 ]
5172
52- # 5. Strip Trivy annotations / comments from every package
73+ # 5. Filter packages — remove synthetic scan-root + noise packages
74+ removed_spdxids = set ()
75+ kept_packages = []
5376 for pkg in doc .get ("packages" , []):
77+ if _is_synthetic (pkg ) or _is_noise (pkg ):
78+ removed_spdxids .add (pkg .get ("SPDXID" ))
79+ continue
80+ # Strip Trivy-added fields
5481 for field in _STRIP_PKG_FIELDS :
5582 pkg .pop (field , None )
83+ kept_packages .append (pkg )
84+ doc ["packages" ] = kept_packages
85+
86+ # 6. Remove files[] section entirely (internal test eggs, not deliverables)
87+ doc .pop ("files" , None )
88+
89+ # 7. Remove relationships that reference removed packages or files
90+ kept_rels = []
91+ for rel in doc .get ("relationships" , []):
92+ if (rel .get ("spdxElementId" ) in removed_spdxids or
93+ rel .get ("relatedSpdxElement" ) in removed_spdxids ):
94+ continue
95+ # Also drop DESCRIBES relationships pointing at the old scan-root
96+ kept_rels .append (rel )
97+ doc ["relationships" ] = kept_rels
5698
5799 with open (output_path , "w" , encoding = "utf-8" ) as f :
58100 json .dump (doc , f , indent = 2 , ensure_ascii = False )
@@ -63,14 +105,15 @@ def patch(input_path: str, output_path: str,
63105 print (f" namespace : { namespace } " )
64106 print (f" created : { now } " )
65107 print (f" creators : Organization: { org } | Tool: { tool } " )
66- print (f" packages : { len (doc .get ('packages' , []))} " )
108+ print (f" packages : { len (doc .get ('packages' , []))} kept, { len (removed_spdxids )} removed" )
109+ print (f" files : removed (internal test artefacts)" )
67110
68111
69112def main () -> None :
70113 p = argparse .ArgumentParser (description = __doc__ )
71114 p .add_argument ("--input" , required = True )
72115 p .add_argument ("--output" , required = True )
73- p .add_argument ("--name" , required = True , help = "SPDX document name ( package name + version) " )
116+ p .add_argument ("--name" , required = True , help = "Clean package name e.g. spotfire-2.5.0 " )
74117 p .add_argument ("--namespace" , required = True )
75118 p .add_argument ("--org" , required = True )
76119 p .add_argument ("--tool" , required = True )
0 commit comments