Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Optimization] Optimize package tracing by only calling SCALIBR on inventories whose file location is found in the underlying layer of a chain layer (the layer only has files found in the layer, whereas the chain layer has files found in all previous layers). #474

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
defer layerReader.Close()

tarReader := tar.NewReader(layerReader)
requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
requiredTargets, err = fillChainLayersWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
if err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
}
Expand Down Expand Up @@ -304,6 +304,7 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
latestLayer: &Layer{
buildCommand: entry.CreatedBy,
isEmpty: true,
fileNodeTree: pathtree.NewNode[fileNode](),
},
maxSymlinkDepth: maxSymlinkDepth,
})
Expand Down Expand Up @@ -353,10 +354,10 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile, maxSy
return chainLayers, nil
}

// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The
// fillChainLayersWithFilesFromTar fills the chain layers with the files found in the tar. The
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
// filesystem.
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
func fillChainLayersWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
currentChainLayer := chainLayersToFill[0]

for {
Expand Down Expand Up @@ -477,6 +478,10 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
// each chainLayer, as they would have been overwritten.
fillChainLayersWithFileNode(chainLayersToFill, newNode)

// Add the fileNode to the node tree of the underlying layer.
layer := currentChainLayer.latestLayer.(*Layer)
layer.fileNodeTree.Insert(virtualPath, newNode)
}
return requiredTargets, nil
}
Expand Down
6 changes: 5 additions & 1 deletion artifact/image/layerscanning/image/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,14 @@ type Layer struct {
diffID digest.Digest
buildCommand string
isEmpty bool
fileNodeTree *pathtree.Node[fileNode]
}

// FS returns a scalibr compliant file system.
func (layer *Layer) FS() scalibrfs.FS {
return nil
return &FS{
tree: layer.fileNodeTree,
}
}

// IsEmpty returns whether the layer is empty.
Expand Down Expand Up @@ -96,6 +99,7 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err
diffID: digest.Digest(diffID.String()),
buildCommand: command,
isEmpty: isEmpty,
fileNodeTree: pathtree.NewNode[fileNode](),
}, nil
}

Expand Down
3 changes: 2 additions & 1 deletion artifact/image/layerscanning/image/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func TestConvertV1Layer(t *testing.T) {
diffID: "sha256:abc123",
buildCommand: "ADD file",
isEmpty: false,
fileNodeTree: pathtree.NewNode[fileNode](),
},
},
{
Expand All @@ -77,7 +78,7 @@ func TestConvertV1Layer(t *testing.T) {
if tc.wantError != nil && gotError == tc.wantError {
t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError)
}
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" {
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{}, pathtree.Node[fileNode]{})); tc.wantLayer != nil && diff != "" {
t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer)
}
})
Expand Down
151 changes: 98 additions & 53 deletions artifact/image/layerscanning/trace/trace.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ package trace

import (
"context"
"errors"
"io/fs"
"fmt"
"slices"
"sort"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/log"

scalibrImage "github.com/google/osv-scalibr/artifact/image"
scalibrfs "github.com/google/osv-scalibr/fs"
Expand Down Expand Up @@ -54,23 +54,11 @@ type locationAndIndex struct {
// Note that a precondition of this algorithm is that the chain layers are ordered by order of
// creation.
func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory, chainLayers []scalibrImage.ChainLayer, config *filesystem.Config) {
chainLayerDetailsList := []*extractor.LayerDetails{}

// Create list of layer details struct to be referenced by inventory.
for i, chainLayer := range chainLayers {
var diffID string
if chainLayer.Layer().IsEmpty() {
diffID = ""
} else {
diffID = chainLayer.Layer().DiffID().Encoded()
}

chainLayerDetailsList = append(chainLayerDetailsList, &extractor.LayerDetails{
Index: i,
DiffID: diffID,
Command: chainLayer.Layer().Command(),
InBaseImage: false,
})
layerDetailsList := createLayerDetailsList(chainLayers)
layerFSList, err := createLayerFSList(chainLayers)
if err != nil {
log.Errorf("Failed to create layer FS list: %v", err)
return
}

// Helper function to update the extractor config.
Expand All @@ -90,7 +78,7 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
lastLayerIndex := len(chainLayers) - 1

for _, inv := range inventory {
layerDetails := chainLayerDetailsList[lastLayerIndex]
layerDetails := layerDetailsList[lastLayerIndex]
invExtractor, isFilesystemExtractor := inv.Extractor.(filesystem.Extractor)

// Only filesystem extractors are supported for layer scanning. Also, if the inventory has no
Expand All @@ -100,66 +88,65 @@ func PopulateLayerDetails(ctx context.Context, inventory []*extractor.Inventory,
continue
}

fileLocation := inv.Locations[0]
layersWithFileIndices := findLayersWithFile(fileLocation, layerFSList)

var foundOrigin bool

// Go backwards through the chain layers and find the first layer where the inventory is not
// present. Such layer is the layer in which the inventory was introduced. If the inventory is
// present in all layers, then it means it was introduced in the first layer.
// TODO: b/381249869 - Optimization: Skip layers if file not found.
for i := len(chainLayers) - 2; i >= 0; i-- {
oldChainLayer := chainLayers[i]
// Iterate through the layer indices that contain the file in reverse order. The first layer
// that contains the inventory is the layer origin layer of the inventory.
for _, i := range layersWithFileIndices {
chainLayer := chainLayers[i]

invLocationAndIndex := locationAndIndex{
location: inv.Locations[0],
location: fileLocation,
index: i,
}

var oldInventory []*extractor.Inventory
var inventory []*extractor.Inventory
if cachedInventory, ok := locationIndexToInventory[invLocationAndIndex]; ok {
oldInventory = cachedInventory
inventory = cachedInventory
} else {
// Check if file still exist in this layer, if not skip extraction.
// Check if file still exists in this layer, if not skip extraction.
// This is both an optimization, and avoids polluting the log output with false file not found errors.
if _, err := oldChainLayer.FS().Stat(inv.Locations[0]); errors.Is(err, fs.ErrNotExist) {
oldInventory = []*extractor.Inventory{}
} else {
// Update the extractor config to use the files from the current layer.
// We only take extract the first location because other locations are derived from the initial
// extraction location. If other locations can no longer be determined from the first location
// they should not be included here, and the trace for those packages stops here.
updateExtractorConfig([]string{inv.Locations[0]}, invExtractor, oldChainLayer.FS())

var err error
oldInventory, _, err = filesystem.Run(ctx, config)
if err != nil {
break
}
if _, err := chainLayer.FS().Stat(fileLocation); err != nil {
continue
}
// Update the extractor config to use the files from the current layer.
// We only extract the first location because other locations are derived from the initial
// extraction location. If other locations can no longer be determined from the first location
// they should not be included here, and the trace for those packages stops here.
updateExtractorConfig([]string{fileLocation}, invExtractor, chainLayer.FS())

var err error
inventory, _, err = filesystem.Run(ctx, config)
if err != nil {
break
}

// Cache the inventory for future use.
locationIndexToInventory[invLocationAndIndex] = oldInventory
locationIndexToInventory[invLocationAndIndex] = inventory
}

foundPackage := false
for _, oldInv := range oldInventory {
if areInventoriesEqual(inv, oldInv) {
for _, otherInv := range inventory {
if areInventoriesEqual(inv, otherInv) {
foundPackage = true
break
}
}

// If the inventory is not present in the old layer, then it was introduced in layer i+1.
if !foundPackage {
layerDetails = chainLayerDetailsList[i+1]
// If the inventory was found, then the current layer is the origin layer.
if foundPackage {
layerDetails = layerDetailsList[i]
foundOrigin = true
break
}
}

// If the inventory is present in every layer, then it means it was introduced in the first
// layer.
if !foundOrigin {
layerDetails = chainLayerDetailsList[0]
log.Infof("Inventory %v not found in any chain layer", inv)
continue
}
inv.LayerDetails = layerDetails
}
Expand Down Expand Up @@ -192,3 +179,61 @@ func areInventoriesEqual(inv1 *extractor.Inventory, inv2 *extractor.Inventory) b
}
return true
}

// getSingleLayerFSFromChainLayer returns the filesystem of the underlying layer in the chain layer.
func getLayerFSFromChainLayer(chainLayer scalibrImage.ChainLayer) (scalibrfs.FS, error) {
layer := chainLayer.Layer()
if layer == nil {
return nil, fmt.Errorf("chain layer has no layer")
}

fs := layer.FS()
if fs == nil {
return nil, fmt.Errorf("layer has no filesystem")
}

return fs, nil
}

func createLayerDetailsList(chainLayers []scalibrImage.ChainLayer) []*extractor.LayerDetails {
layerDetailsList := []*extractor.LayerDetails{}
for i, chainLayer := range chainLayers {
var diffID string
if chainLayer.Layer().IsEmpty() {
diffID = ""
} else {
diffID = chainLayer.Layer().DiffID().Encoded()
}
layerDetailsList = append(layerDetailsList, &extractor.LayerDetails{
Index: i,
DiffID: diffID,
Command: chainLayer.Layer().Command(),
InBaseImage: false,
})
}
return layerDetailsList
}

func createLayerFSList(chainLayers []scalibrImage.ChainLayer) ([]scalibrfs.FS, error) {
layerFSList := []scalibrfs.FS{}
for _, chainLayer := range chainLayers {
layerFS, err := getLayerFSFromChainLayer(chainLayer)
if err != nil {
return nil, err
}
layerFSList = append(layerFSList, layerFS)
}
return layerFSList, nil
}

func findLayersWithFile(fileLocation string, layerFSList []scalibrfs.FS) []int {
layersIndices := []int{}
for i := len(layerFSList) - 1; i >= 0; i-- {
layerFS := layerFSList[i]
if _, err := layerFS.Stat(fileLocation); err != nil {
continue
}
layersIndices = append(layersIndices, i)
}
return layersIndices
}
Loading