Skip to content

Commit a4ee8a4

Browse files
rpbeltrancopybara-github
authored andcommitted
Make POM.xml transitive dependency extractions a graph
PiperOrigin-RevId: 921774138
1 parent bdb1ecb commit a4ee8a4

16 files changed

Lines changed: 674 additions & 58 deletions

File tree

binary/proto/inventory_test.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ func TestInventoryToProto(t *testing.T) {
116116
opts = []cmp.Option{
117117
protocmp.Transform(),
118118
cmpopts.IgnoreFields(extractor.LayerMetadata{}, "ParentContainer"),
119+
cmpopts.IgnoreFields(extractor.Package{}, "ParentIDs"),
120+
cmpopts.IgnoreFields(extractor.Package{}, "ID"),
119121
}
120122
if diff := cmp.Diff(tc.inv, gotInv, opts...); diff != "" {
121123
t.Errorf("InventoryToStruct(%v) returned diff (-want +got):\n%s", gotInv, diff)
@@ -253,6 +255,8 @@ func TestInventoryToStruct(t *testing.T) {
253255
opts := []cmp.Option{
254256
protocmp.Transform(),
255257
cmpopts.IgnoreFields(extractor.LayerMetadata{}, "ParentContainer"),
258+
cmpopts.IgnoreFields(extractor.Package{}, "ParentIDs"),
259+
cmpopts.IgnoreFields(extractor.Package{}, "ID"),
256260
cmpopts.EquateEmpty(),
257261
}
258262
if diff := cmp.Diff(tc.want, got, opts...); diff != "" {
@@ -304,8 +308,10 @@ func TestInventoryToStructInvalidPkgVuln(t *testing.T) {
304308
PackageVulns: []*pb.PackageVuln{{PackageId: "pkg"}},
305309
},
306310
want: &inventory.Inventory{
307-
Packages: []*extractor.Package{{Name: "pkg1"}, {Name: "pkg2"}},
308-
PackageVulns: []*inventory.PackageVuln{{Package: &extractor.Package{Name: "pkg1"}}},
311+
Packages: []*extractor.Package{
312+
{Name: "pkg1", ID: "pkg"},
313+
{Name: "pkg2", ID: "pkg"}},
314+
PackageVulns: []*inventory.PackageVuln{{Package: &extractor.Package{Name: "pkg1", ID: "pkg"}}},
309315
},
310316
},
311317
{
@@ -320,7 +326,7 @@ func TestInventoryToStructInvalidPkgVuln(t *testing.T) {
320326
for _, tc := range testCases {
321327
t.Run(tc.desc, func(t *testing.T) {
322328
got := proto.InventoryToStruct(tc.inv)
323-
if diff := cmp.Diff(tc.want, got, cmpopts.IgnoreFields(extractor.LayerMetadata{}, "ParentContainer"), protocmp.Transform()); diff != "" {
329+
if diff := cmp.Diff(tc.want, got, cmpopts.IgnoreFields(extractor.LayerMetadata{}, "ParentContainer"), cmpopts.IgnoreFields(extractor.Package{}, "ParentIDs"), protocmp.Transform()); diff != "" {
324330
t.Fatalf("InventoryToStruct(%v) returned diff (-want +got):\n%s", tc.inv, diff)
325331
}
326332
})

binary/proto/package.go

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package proto
1616

1717
import (
1818
"fmt"
19+
"sort"
1920

2021
"github.com/google/osv-scalibr/binary/proto/metadata"
2122
"github.com/google/osv-scalibr/converter"
@@ -25,7 +26,6 @@ import (
2526
"github.com/google/osv-scalibr/log"
2627
"github.com/google/osv-scalibr/purl"
2728
"github.com/google/osv-scalibr/purl/purlproto"
28-
"github.com/google/uuid"
2929
"google.golang.org/protobuf/reflect/protoreflect"
3030

3131
spb "github.com/google/osv-scalibr/binary/proto/scan_result_go_proto"
@@ -65,13 +65,20 @@ func PackageToProto(pkg *extractor.Package) (*spb.Package, error) {
6565
}
6666
}
6767

68-
id, err := uuid.NewRandom()
68+
id, err := pkg.GetIDOrGenerate(&extractor.RandomIDGenerator{})
6969
if err != nil {
70-
return nil, fmt.Errorf("failed to generate UUID for %q package %q version %q: %w", pkg.Ecosystem().String(), pkg.Name, pkg.Version, err)
70+
return nil, err
71+
}
72+
73+
var parentIDs []string
74+
for parent := range pkg.ParentIDs {
75+
parentIDs = append(parentIDs, parent)
7176
}
77+
sort.Strings(parentIDs)
7278

7379
packageProto := &spb.Package{
74-
Id: id.String(),
80+
Id: id,
81+
ParentIds: parentIDs,
7582
Name: pkg.Name,
7683
Version: pkg.Version,
7784
SourceCode: sourceCodeIdentifierToProto(pkg.SourceCode),
@@ -194,8 +201,16 @@ func PackageToStruct(pkgProto *spb.Package) (*extractor.Package, error) {
194201
if err != nil {
195202
return nil, err
196203
}
204+
205+
parentIDs := make(map[string]bool)
206+
for _, id := range pkgProto.GetParentIds() {
207+
parentIDs[id] = true
208+
}
209+
197210
pkg := &extractor.Package{
198211
Name: pkgProto.GetName(),
212+
ID: pkgProto.GetId(),
213+
ParentIDs: parentIDs,
199214
Version: pkgProto.GetVersion(),
200215
SourceCode: sourceCodeIdentifierToStruct(pkgProto.GetSourceCode()),
201216
Location: packageLocationToStruct(pkgProto.GetLocation()),

binary/proto/package_test.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"github.com/google/go-cmp/cmp"
2222
"github.com/google/osv-scalibr/binary/proto"
2323
"github.com/google/osv-scalibr/extractor"
24+
"github.com/google/osv-scalibr/purl"
2425
"google.golang.org/protobuf/testing/protocmp"
2526

2627
spb "github.com/google/osv-scalibr/binary/proto/scan_result_go_proto"
@@ -33,6 +34,34 @@ var (
3334
}
3435
)
3536

37+
func exampleTransitiveDependencyPackage() *extractor.Package {
38+
return &extractor.Package{
39+
Name: "transitive_package",
40+
ParentIDs: map[string]bool{"parent_1": true, "parent_2": true},
41+
Version: "1.0.0",
42+
PURLType: purl.TypePyPi,
43+
Location: extractor.LocationFromPath("/file1"),
44+
Plugins: []string{"extractor_name"},
45+
}
46+
}
47+
48+
func exampleTransitiveDependencyPackageProto() *spb.Package {
49+
return &spb.Package{
50+
Name: "transitive_package",
51+
Version: "1.0.0",
52+
Ecosystem: "PyPI",
53+
Location: pkgLocProtoFromPath("/file1"),
54+
Plugins: []string{"extractor_name"},
55+
ParentIds: []string{"parent_1", "parent_2"},
56+
Purl: &spb.Purl{
57+
Purl: "pkg:pypi/transitive-package@1.0.0",
58+
Type: "pypi",
59+
Name: "transitive-package",
60+
Version: "1.0.0",
61+
},
62+
}
63+
}
64+
3665
func TestPackageToProto(t *testing.T) {
3766
testCases := []struct {
3867
desc string
@@ -50,6 +79,11 @@ func TestPackageToProto(t *testing.T) {
5079
pkg: PurlDPKGAnnotationPackage(),
5180
want: PurlDPKGAnnotationPackageProto(t),
5281
},
82+
{
83+
desc: "transitive_dependency",
84+
pkg: exampleTransitiveDependencyPackage(),
85+
want: exampleTransitiveDependencyPackageProto(),
86+
},
5387
}
5488

5589
for _, tc := range testCases {
@@ -107,6 +141,11 @@ func TestPackageToStruct(t *testing.T) {
107141
pkg: PurlDPKGAnnotationPackageProto(t),
108142
want: PurlDPKGAnnotationPackage(),
109143
},
144+
{
145+
desc: "transitive_dependency",
146+
pkg: exampleTransitiveDependencyPackageProto(),
147+
want: exampleTransitiveDependencyPackage(),
148+
},
110149
}
111150

112151
for _, tc := range testCases {

binary/proto/result_test.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ func PurlDPKGAnnotationPackage() *extractor.Package {
9292
Justification: vex.ComponentNotPresent,
9393
MatchesAllVulns: true,
9494
}},
95+
ParentIDs: map[string]bool{},
9596
}
9697
}
9798

@@ -1668,6 +1669,10 @@ func TestScanResultToProtoAndBack(t *testing.T) {
16681669
}
16691670

16701671
gotInv := proto.InventoryToStruct(invProto)
1672+
// Ignore package ID fields because it is randomly generated.
1673+
for _, pkg := range gotInv.Packages {
1674+
pkg.ID = ""
1675+
}
16711676
if diff := cmp.Diff(tc.res.Inventory, *gotInv, opts...); diff != "" {
16721677
t.Errorf("proto.InventoryToStruct(%v) returned unexpected diff (-want +got):\n%s", invProto, diff)
16731678
}

binary/proto/scan_result.proto

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,9 @@ message Package {
170170
}
171171

172172
optional ContainerImageMetadataIndexes container_image_metadata_indexes = 57;
173+
174+
// The ID of the parent package, if this package is a transitive dependency.
175+
repeated string parent_ids = 70;
173176
}
174177

175178
// Paths or source of files related to an extracted package.

binary/proto/scan_result_go_proto/scan_result.pb.go

Lines changed: 12 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

enricher/transitivedependency/internal/grouping.go

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
package internal
1717

1818
import (
19+
"fmt"
1920
"slices"
2021

22+
"deps.dev/util/resolve"
2123
"github.com/google/osv-scalibr/extractor"
2224
"github.com/google/osv-scalibr/inventory"
2325
"github.com/google/osv-scalibr/log"
@@ -56,13 +58,69 @@ func Add(enrichedPkgs []*extractor.Package, inv *inventory.Inventory, pluginName
5658
for _, pkg := range enrichedPkgs {
5759
indexPkg, ok := existingPackages[pkg.Name]
5860
if ok {
59-
// This dependency is in manifest, update the version and plugins.
61+
// This dependency is in manifest, update the version, plugins and parent IDs.
6062
i := indexPkg.Index
6163
inv.Packages[i].Version = pkg.Version
6264
inv.Packages[i].Plugins = append(inv.Packages[i].Plugins, pluginName)
65+
66+
if len(pkg.ParentIDs) > 0 && inv.Packages[i].ParentIDs == nil {
67+
inv.Packages[i].ParentIDs = make(map[string]bool)
68+
}
69+
70+
for parentID := range pkg.ParentIDs {
71+
inv.Packages[i].ParentIDs[parentID] = true
72+
}
6373
} else {
6474
// This dependency is not found in manifest, so it's a transitive dependency.
6575
inv.Packages = append(inv.Packages, pkg)
6676
}
6777
}
6878
}
79+
80+
// GetNameToIDMapping returns a mapping of package name to package ID for a given list of packages
81+
// and a dependency graph. Known packages without IDs will have IDs added using the ID generator.
82+
func GetNameToIDMapping(g *resolve.Graph, packages []*extractor.Package, idGenerator extractor.IDGenerator) (map[string]string, error) {
83+
nameToID := make(map[string]string)
84+
for _, pkg := range packages {
85+
id, err := pkg.RequireID(idGenerator)
86+
if err != nil {
87+
return nil, err
88+
}
89+
nameToID[pkg.Name] = id
90+
}
91+
92+
for i := 1; i < len(g.Nodes); i++ {
93+
node := g.Nodes[i]
94+
if _, ok := nameToID[node.Version.Name]; !ok {
95+
id, err := idGenerator.GenerateID(node.Version.Name)
96+
if err != nil {
97+
return nil, fmt.Errorf("failed to generate random UUID: %w", err)
98+
}
99+
nameToID[node.Version.Name] = id
100+
}
101+
}
102+
return nameToID, nil
103+
}
104+
105+
// GetParentIDs returns the set of parent IDs for a node in a dependency graph.
106+
func GetParentIDs(g *resolve.Graph, nameToID map[string]string, nodeID resolve.NodeID) (map[string]bool, error) {
107+
parents := make(map[string]bool)
108+
for _, edge := range g.Edges {
109+
if edge.To == nodeID {
110+
if int(edge.From) >= len(g.Nodes) {
111+
return nil, fmt.Errorf("parent id %v is out of range for nodes (length %v)", edge.From, len(g.Nodes))
112+
}
113+
if edge.From == 0 {
114+
parents["root"] = true
115+
continue
116+
}
117+
parentPkgName := g.Nodes[edge.From].Version.Name
118+
parentPkgID, ok := nameToID[parentPkgName]
119+
if !ok {
120+
return nil, fmt.Errorf("parent package %q not found in known packages", parentPkgName)
121+
}
122+
parents[parentPkgID] = true
123+
}
124+
}
125+
return parents, nil
126+
}

0 commit comments

Comments
 (0)