Skip to content

Commit 7515cd2

Browse files
authored
Resolve and load dependencies from SPDX SBOMs #1145 (#1827)
Signed-off-by: tdruez <tdruez@nexb.com>
1 parent 08e4e75 commit 7515cd2

15 files changed

+677
-46
lines changed

CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,12 @@
11
Changelog
22
=========
33

4+
v35.4.0 (unreleased)
5+
--------------------
6+
7+
- Resolve and load dependencies from SPDX SBOMs.
8+
https://github.com/aboutcode-org/scancode.io/issues/1145
9+
410
v35.3.0 (2025-08-20)
511
--------------------
612

scanpipe/pipelines/load_sbom.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/aboutcode-org/scancode.io for support and download.
2222

23+
from scanpipe.models import DiscoveredDependency
2324
from scanpipe.pipelines.scan_codebase import ScanCodebase
2425
from scanpipe.pipes import resolve
2526

@@ -44,7 +45,7 @@ def steps(cls):
4445
cls.flag_empty_files,
4546
cls.flag_ignored_resources,
4647
cls.get_sbom_inputs,
47-
cls.get_packages_from_sboms,
48+
cls.get_data_from_sboms,
4849
cls.create_packages_from_sboms,
4950
cls.create_dependencies_from_sboms,
5051
)
@@ -53,13 +54,13 @@ def get_sbom_inputs(self):
5354
"""Locate all the SBOMs among the codebase resources."""
5455
self.manifest_resources = resolve.get_manifest_resources(self.project)
5556

56-
def get_packages_from_sboms(self):
57-
"""Get packages data from SBOMs."""
58-
self.packages = resolve.get_packages(
57+
def get_data_from_sboms(self):
58+
"""Get data from SBOMs."""
59+
self.packages, self.dependencies = resolve.get_data_from_manifests(
5960
project=self.project,
6061
package_registry=resolve.sbom_registry,
6162
manifest_resources=self.manifest_resources,
62-
model="get_packages_from_sboms",
63+
model="get_data_from_sboms",
6364
)
6465

6566
def create_packages_from_sboms(self):
@@ -71,4 +72,12 @@ def create_packages_from_sboms(self):
7172

7273
def create_dependencies_from_sboms(self):
7374
"""Create the dependency relationship declared in the SBOMs."""
75+
# CycloneDX support: the dependency data is stored in ``extra_data``.
7476
resolve.create_dependencies_from_packages_extra_data(project=self.project)
77+
78+
# SPDX support: the dependency data is loaded from ``self.dependencies``.
79+
for dependency_data in self.dependencies:
80+
DiscoveredDependency.create_from_data(
81+
project=self.project,
82+
dependency_data=dependency_data,
83+
)

scanpipe/pipelines/resolve_dependencies.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def get_packages_from_manifest(self):
8484
Resolve package data from lockfiles/requirement files with package
8585
requirements/dependencies.
8686
"""
87-
self.resolved_packages = resolve.get_packages(
87+
self.packages, self.dependencies = resolve.get_data_from_manifests(
8888
project=self.project,
8989
package_registry=resolve.resolver_registry,
9090
manifest_resources=self.manifest_resources,
@@ -99,6 +99,6 @@ def create_resolved_packages(self):
9999
"""
100100
resolve.create_packages_and_dependencies(
101101
project=self.project,
102-
packages=self.resolved_packages,
102+
packages=self.packages,
103103
resolved=True,
104104
)

scanpipe/pipes/cyclonedx.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,9 @@ def get_external_references(component):
7979

8080
references = defaultdict(list)
8181
for reference in external_references:
82-
references[reference.type.value].append(reference.url.uri)
82+
reference_url = reference.url
83+
if reference_url and reference_url.uri:
84+
references[reference.type.value].append(reference_url.uri)
8385

8486
return dict(references)
8587

@@ -158,12 +160,9 @@ def cyclonedx_component_to_package_data(
158160
vulnerabilities = vulnerabilities or {}
159161
extra_data = {}
160162

161-
# Store the original bom_ref and dependencies for future processing.
162163
bom_ref = str(cdx_component.bom_ref)
163-
if bom_ref:
164-
extra_data["bom_ref"] = bom_ref
165-
if depends_on := dependencies.get(bom_ref):
166-
extra_data["depends_on"] = depends_on
164+
if depends_on := dependencies.get(bom_ref):
165+
extra_data["depends_on"] = depends_on
167166

168167
package_url_dict = {}
169168
if cdx_component.purl:
@@ -189,6 +188,8 @@ def cyclonedx_component_to_package_data(
189188
)
190189

191190
package_data = {
191+
# Store the original "bom_ref" as package_uid for dependencies resolution.
192+
"package_uid": bom_ref,
192193
"name": cdx_component.name,
193194
"extracted_license_statement": declared_license,
194195
"copyright": cdx_component.copyright,

scanpipe/pipes/output.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -671,6 +671,28 @@ def _get_spdx_extracted_licenses(license_expressions):
671671
return extracted_licenses
672672

673673

674+
def get_dependency_as_spdx_relationship(dependency, document_spdx_id, packages_as_spdx):
675+
"""Return a spdx.Relationship crafted from the provided ``dependency`` instance."""
676+
if dependency.for_package: # Package dependency
677+
parent_id = dependency.for_package.spdx_id
678+
else: # Project dependency
679+
parent_id = document_spdx_id
680+
681+
if dependency.is_resolved_to_package: # Resolved to a Package
682+
child_id = dependency.resolved_to_package.spdx_id
683+
else: # Not resolved to a Package (only package_url value is available)
684+
dependency_as_package = dependency.as_spdx_package()
685+
packages_as_spdx.append(dependency_as_package)
686+
child_id = dependency_as_package.spdx_id
687+
688+
spdx_relationship = spdx.Relationship(
689+
spdx_id=child_id,
690+
related_spdx_id=parent_id,
691+
relationship="DEPENDENCY_OF",
692+
)
693+
return spdx_relationship
694+
695+
674696
def to_spdx(project, include_files=False):
675697
"""
676698
Generate output for the provided ``project`` in SPDX document format.
@@ -682,6 +704,7 @@ def to_spdx(project, include_files=False):
682704
discoveredpackage_qs = get_queryset(project, "discoveredpackage")
683705
discovereddependency_qs = get_queryset(project, "discovereddependency")
684706

707+
document_spdx_id = f"SPDXRef-DOCUMENT-{project.uuid}"
685708
packages_as_spdx = []
686709
license_expressions = []
687710
relationships = []
@@ -692,15 +715,12 @@ def to_spdx(project, include_files=False):
692715
license_expressions.append(license_expression)
693716

694717
for dependency in discovereddependency_qs:
695-
packages_as_spdx.append(dependency.as_spdx_package())
696-
if dependency.for_package:
697-
relationships.append(
698-
spdx.Relationship(
699-
spdx_id=dependency.spdx_id,
700-
related_spdx_id=dependency.for_package.spdx_id,
701-
relationship="DEPENDENCY_OF",
702-
)
703-
)
718+
spdx_relationship = get_dependency_as_spdx_relationship(
719+
dependency,
720+
document_spdx_id,
721+
packages_as_spdx,
722+
)
723+
relationships.append(spdx_relationship)
704724

705725
files_as_spdx = []
706726
if include_files:
@@ -710,6 +730,7 @@ def to_spdx(project, include_files=False):
710730
]
711731

712732
document = spdx.Document(
733+
spdx_id=document_spdx_id,
713734
name=f"scancodeio_{project.name}",
714735
namespace=f"https://scancode.io/spdxdocs/{project.uuid}",
715736
creation_info=spdx.CreationInfo(tool=f"ScanCode.io-{scancodeio_version}"),

scanpipe/pipes/resolve.py

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,33 @@ def resolve_manifest_resources(resource, package_registry):
6060
return packages
6161

6262

63-
def get_packages(project, package_registry, manifest_resources, model=None):
63+
def get_dependencies_from_manifest(resource):
6464
"""
65-
Get package data from package manifests/lockfiles/SBOMs or
66-
get package data for resolved packages from package requirements.
65+
Get dependency data from resource.
66+
This is used for SPDX where the dependency data is stored as its own
67+
entry in the SBOM.
68+
On the CycloneDX side, the dependency data is stored inline in the
69+
component entries, it is stored on the package ``extra_data``.
70+
"""
71+
dependencies = []
72+
73+
default_package_type = get_default_package_type(resource.location)
74+
if not default_package_type:
75+
return []
76+
77+
if default_package_type == "spdx":
78+
dependencies = resolve_spdx_dependencies(input_location=resource.location)
79+
80+
return dependencies
81+
82+
83+
def get_data_from_manifests(project, package_registry, manifest_resources, model=None):
84+
"""
85+
Get package and dependency data from package manifests/lockfiles/SBOMs or
86+
for resolved packages from package requirements.
6787
"""
6888
resolved_packages = []
89+
resolved_dependencies = []
6990
sboms_headers = {}
7091

7192
if not manifest_resources.exists():
@@ -76,7 +97,8 @@ def get_packages(project, package_registry, manifest_resources, model=None):
7697
return []
7798

7899
for resource in manifest_resources:
79-
if packages := resolve_manifest_resources(resource, package_registry):
100+
packages = resolve_manifest_resources(resource, package_registry)
101+
if packages:
80102
resolved_packages.extend(packages)
81103
if headers := get_manifest_headers(resource):
82104
sboms_headers[resource.name] = headers
@@ -87,10 +109,14 @@ def get_packages(project, package_registry, manifest_resources, model=None):
87109
object_instance=resource,
88110
)
89111

112+
dependencies = get_dependencies_from_manifest(resource)
113+
if dependencies:
114+
resolved_dependencies.extend(dependencies)
115+
90116
if sboms_headers:
91117
project.update_extra_data({"sboms_headers": sboms_headers})
92118

93-
return resolved_packages
119+
return resolved_packages, resolved_dependencies
94120

95121

96122
def create_packages_and_dependencies(project, packages, resolved=False):
@@ -139,7 +165,7 @@ def create_dependencies_from_packages_extra_data(project):
139165

140166
for bom_ref in for_package.extra_data.get("depends_on", []):
141167
try:
142-
resolved_to_package = project_packages.get(extra_data__bom_ref=bom_ref)
168+
resolved_to_package = project_packages.get(package_uid=bom_ref)
143169
except (ObjectDoesNotExist, MultipleObjectsReturned):
144170
project.add_error(
145171
description=f"Could not find resolved_to package entry: {bom_ref}.",
@@ -284,8 +310,12 @@ def convert_spdx_expression(license_expression_spdx):
284310
return get_license_detections_and_expression(license_expression_spdx)[1]
285311

286312

287-
def spdx_package_to_discovered_package_data(spdx_package):
313+
def spdx_package_to_package_data(spdx_package):
314+
"""Convert the provided spdx_package into package_data."""
288315
package_url_dict = {}
316+
# Store the original "SPDXID" as package_uid for dependencies resolution.
317+
package_uid = spdx_package.spdx_id
318+
289319
for ref in spdx_package.external_refs:
290320
if ref.type == "purl":
291321
purl = ref.locator
@@ -302,6 +332,7 @@ def spdx_package_to_discovered_package_data(spdx_package):
302332
declared_expression = convert_spdx_expression(declared_license_expression_spdx)
303333

304334
package_data = {
335+
"package_uid": package_uid,
305336
"name": spdx_package.name,
306337
"download_url": spdx_package.download_location,
307338
"declared_license_expression": declared_expression,
@@ -324,8 +355,28 @@ def spdx_package_to_discovered_package_data(spdx_package):
324355
}
325356

326357

327-
def resolve_spdx_packages(input_location):
328-
"""Resolve the packages from the `input_location` SPDX document file."""
358+
def spdx_relationship_to_dependency_data(spdx_relationship):
359+
"""Convert the provided spdx_relationship into dependency_data."""
360+
# spdx_id is a dependency of related_spdx_id
361+
if spdx_relationship.is_dependency_relationship:
362+
for_package_uid = spdx_relationship.related_spdx_id
363+
resolve_to_package_uid = spdx_relationship.spdx_id
364+
else: # spdx_id depends on related_spdx_id
365+
for_package_uid = spdx_relationship.spdx_id
366+
resolve_to_package_uid = spdx_relationship.related_spdx_id
367+
368+
dependency_data = {
369+
"for_package_uid": for_package_uid,
370+
"resolve_to_package_uid": resolve_to_package_uid,
371+
"is_runtime": True,
372+
"is_resolved": True,
373+
"is_direct": True,
374+
}
375+
return dependency_data
376+
377+
378+
def get_spdx_document_from_file(input_location):
379+
"""Return the loaded SPDX document from the `input_location` file."""
329380
input_path = Path(input_location)
330381
spdx_document = json.loads(input_path.read_text())
331382

@@ -334,12 +385,32 @@ def resolve_spdx_packages(input_location):
334385
except Exception as e:
335386
raise Exception(f'SPDX document "{input_path.name}" is not valid: {e}')
336387

388+
return spdx_document
389+
390+
391+
def resolve_spdx_packages(input_location):
392+
"""Resolve the packages from the `input_location` SPDX document file."""
393+
spdx_document = get_spdx_document_from_file(input_location)
337394
return [
338-
spdx_package_to_discovered_package_data(spdx.Package.from_data(spdx_package))
395+
spdx_package_to_package_data(spdx.Package.from_data(spdx_package))
339396
for spdx_package in spdx_document.get("packages", [])
340397
]
341398

342399

400+
def resolve_spdx_dependencies(input_location):
401+
"""Resolve the dependencies from the `input_location` SPDX document file."""
402+
spdx_document = get_spdx_document_from_file(input_location)
403+
spdx_relationships = [
404+
spdx.Relationship.from_data(spdx_relationship)
405+
for spdx_relationship in spdx_document.get("relationships", [])
406+
]
407+
408+
return [
409+
spdx_relationship_to_dependency_data(spdx_relationship)
410+
for spdx_relationship in spdx_relationships
411+
]
412+
413+
343414
def get_default_package_type(input_location):
344415
"""
345416
Return the package type associated with the provided `input_location`.

scanpipe/pipes/spdx.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,18 @@ def from_data(cls, data):
520520
comment=data.get("comment"),
521521
)
522522

523+
@property
524+
def is_dependency_relationship(self):
525+
"""
526+
Return True if this relationship type implies that the spdx_id element
527+
is a dependency of related_spdx_id.
528+
"""
529+
reverse_dependency_types = ["ANCESTOR_OF", "CONTAINS", "DEPENDS_ON"]
530+
# Every others types implies that the spdx_id element is a dependency of
531+
# related_spdx_id. Such as:
532+
# "DEPENDENCY_OF", "DESCENDANT_OF", "PACKAGE_OF", "CONTAINED_BY", ...
533+
return self.relationship.upper() not in reverse_dependency_types
534+
523535

524536
@dataclass
525537
class Document:

scanpipe/tests/data/asgiref/asgiref-3.3.0.spdx.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"spdxVersion": "SPDX-2.3",
33
"dataLicense": "CC0-1.0",
4-
"SPDXID": "SPDXRef-DOCUMENT",
4+
"SPDXID": "SPDXRef-DOCUMENT-804c3391-e6f9-415f-bb7a-cb6653853a46",
55
"name": "scancodeio_asgiref",
66
"documentNamespace": "https://scancode.io/spdxdocs/804c3391-e6f9-415f-bb7a-cb6653853a46",
77
"creationInfo": {

scanpipe/tests/data/cyclonedx/nested.cdx.json

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,17 @@
8989
"content": "806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"
9090
}
9191
]
92+
},
93+
{
94+
"type": "build-meta",
95+
"url": "",
96+
"comment": "Missing URL",
97+
"hashes": [
98+
{
99+
"alg": "SHA-1",
100+
"content": "568f3f90c3d6aced58de033a3547ccd2e4e088e8"
101+
}
102+
]
92103
}
93104
],
94105
"licenses": [

0 commit comments

Comments
 (0)