Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Optimization] Changed the layer scanning tracing logic to reduce the number of ToPURL calls, since they can be expensive. #485

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 33 additions & 28 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,10 @@ import (
"context"
"errors"
"io/fs"
"slices"
"sort"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/log"

scalibrImage "github.com/google/osv-scalibr/artifact/image"
scalibrfs "github.com/google/osv-scalibr/fs"
Expand Down Expand Up @@ -54,6 +53,13 @@ type locationAndIndex struct {
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
// If there are no chain layers, then there is nothing to trace. This should not happen, but we
// should handle it gracefully.
if len(chainLayers) == 0 {
log.Warnf("No chain layers found, cannot trace inventory.")
return
}

chainLayerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
Expand Down Expand Up @@ -100,6 +106,11 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
continue
}

var invPURL string
if inv.Extractor != nil {
invPURL = inv.Extractor.ToPURL(inv).String()
}

var foundOrigin bool

// Go backwards through the chain layers and find the first layer where the inventory is not
Expand Down Expand Up @@ -142,10 +153,22 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,

foundPackage := false
for _, oldInv := range oldInventory {
if areInventoriesEqual(inv, oldInv) {
foundPackage = true
break
if oldInv.Extractor == nil {
continue
}

// PURLs are being used as a package key, so if they are different, skip this inventory.
oldInvPURL := oldInv.Extractor.ToPURL(oldInv).String()
if oldInvPURL != invPURL {
continue
}

if !areLocationsEqual(oldInv.Locations, inv.Locations) {
continue
}

foundPackage = true
break
}

// If the inventory is not present in the old layer, then it was introduced in layer i+1.
Expand All @@ -165,30 +188,12 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
}
}

// areInventoriesEqual checks if two inventories are equal. It does this by comparing the PURLs and
// the locations of the inventories.
func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) bool {
if inv1.Extractor == nil || inv2.Extractor == nil {
return false
}

// Check if the PURLs are equal.
purl1 := inv1.Extractor.ToPURL(inv1)
purl2 := inv2.Extractor.ToPURL(inv2)

if purl1.String() != purl2.String() {
// areLocationsEqual checks if the inventory location strings are equal.
func areLocationsEqual(fileLocations []string, otherFileLocations []string) bool {
if len(fileLocations) == 0 || len(otherFileLocations) == 0 {
log.Warnf("Empty file locations found, cannot compare locations")
return false
}

// Check if the locations are equal.
locations1 := inv1.Locations[:]
sort.Strings(locations1)

locations2 := inv2.Locations[:]
sort.Strings(locations2)

if !slices.Equal(locations1, locations2) {
return false
}
return true
return fileLocations[0] == otherFileLocations[0]
}
185 changes: 36 additions & 149 deletions artifact/image/layerscanning/trace/trace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,42 @@ func TestPopulateLayerDetails(t *testing.T) {
chainLayers: []image.ChainLayer{},
wantInventory: []*extractor.Inventory{},
},
{
name: "empty chain layers",
inventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
Extractor: fakeExtractor1,
},
},
chainLayers: []image.ChainLayer{},
wantInventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
Extractor: fakeExtractor1,
},
},
},
{
name: "inventory with nil extractor",
inventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
},
},
chainLayers: []image.ChainLayer{
fakeChainLayer1,
},
wantInventory: []*extractor.Inventory{
{
Name: fooPackage,
Locations: []string{fooFile},
},
},
},
{
name: "inventory in single chain layer",
inventory: []*extractor.Inventory{
Expand Down Expand Up @@ -329,152 +365,3 @@ func TestPopulateLayerDetails(t *testing.T) {
})
}
}

func TestAreInventoriesEqual(t *testing.T) {
tests := []struct {
name string
inv1 *extractor.Inventory
inv2 *extractor.Inventory
want bool
}{
{
name: "nil extractor",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: nil,
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: nil,
},
want: false,
},
{
name: "same inventory",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: true,
},
{
name: "same inventory with multiple locations",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt", "another-foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"another-foo.txt", "foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: true,
},
{
name: "different name",
inv1: &extractor.Inventory{
Name: "foo",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "bar",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
{
name: "different version",
inv1: &extractor.Inventory{
Name: "foo",
Version: "1.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Version: "2.0",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
{
name: "different locations",
inv1: &extractor.Inventory{
Name: "foo",
Locations: []string{"foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
inv2: &extractor.Inventory{
Name: "foo",
Locations: []string{"another-foo.txt"},
Extractor: fakeextractor.New("fake-extractor", 1, []string{"foo.txt"}, map[string]fakeextractor.NamesErr{
"foo.txt": fakeextractor.NamesErr{
Names: []string{"foo"},
},
}),
},
want: false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
if got := areInventoriesEqual(tc.inv1, tc.inv2); got != tc.want {
t.Errorf("areInventoriesEqual(%v, %v) = %v, want: %v", tc.inv1, tc.inv2, got, tc.want)
}
})
}
}
Loading