Skip to content

Commit

Permalink
[Layer Scanning] Add symlinks support for the FileRequirer logic in…
Browse files Browse the repository at this point in the history
… `image.FromTarball`; changed the `Layer.Uncompressed` method to return a new `ReaderCloser` every time the method is called.

PiperOrigin-RevId: 725223477
  • Loading branch information
Mario Leyva authored and copybara-github committed Feb 10, 2025
1 parent 126d3e3 commit c591953
Show file tree
Hide file tree
Showing 5 changed files with 182 additions and 50 deletions.
128 changes: 90 additions & 38 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ type Config struct {
func DefaultConfig() *Config {
return &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: &require.FileRequirerAll{},
// All files are required by default.
Requirer: &require.FileRequirerAll{},
}
}

Expand Down Expand Up @@ -193,44 +194,65 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
}
}

// Reverse loop through the layers to start from the latest layer first. This allows us to skip
// all files already seen.
for i := len(chainLayers) - 1; i >= 0; i-- {
chainLayer := chainLayers[i]
requiredTargets := make(map[string]bool)
for range DefaultMaxSymlinkDepth {
// Reverse loop through the layers to start from the latest layer first. This allows us to skip
// all files already seen.
for i := len(chainLayers) - 1; i >= 0; i-- {
chainLayer := chainLayers[i]

// If the layer is empty, then there is nothing to do.
if chainLayer.latestLayer.IsEmpty() {
continue
}
// If the layer is empty, then there is nothing to do.
if chainLayer.latestLayer.IsEmpty() {
continue
}

originLayerID := chainLayer.latestLayer.DiffID().Encoded()
originLayerID := chainLayer.latestLayer.DiffID().Encoded()

// Create the chain layer directory if it doesn't exist.
// Use filepath here as it is a path that will be written to disk.
dirPath := filepath.Join(tempPath, originLayerID)
if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) {
return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err)
}
// Create the chain layer directory if it doesn't exist.
// Use filepath here as it is a path that will be written to disk.
dirPath := filepath.Join(tempPath, originLayerID)
if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) {
return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err)
}

chainLayersToFill := chainLayers[i:]
layerReader, err := chainLayer.latestLayer.Uncompressed()
if err != nil {
return &outputImage, err
chainLayersToFill := chainLayers[i:]
layerReader, err := chainLayer.latestLayer.Uncompressed()
if err != nil {
return &outputImage, err
}

err = func() error {
// Manually close at the end of the for loop.
defer layerReader.Close()

tarReader := tar.NewReader(layerReader)
requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
if err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
}
return nil
}()

if err != nil {
return &outputImage, err
}
}

err = func() error {
// Manually close at the end of the for loop.
defer layerReader.Close()
// If there are no more required targets from symlinks, then there is no need to continue.
if len(requiredTargets) == 0 {
break
}

tarReader := tar.NewReader(layerReader)
if err := fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill); err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
stillHaveRequiredTargets := false
for _, isRequired := range requiredTargets {
if isRequired {
stillHaveRequiredTargets = true
break
}
return nil
}()
}

if err != nil {
return &outputImage, err
if !stillHaveRequiredTargets {
break
}
}
return &outputImage, nil
Expand Down Expand Up @@ -302,14 +324,16 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile) ([]*c
// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
// filesystem.
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer) error {
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
currentChainLayer := chainLayersToFill[0]

for {
header, err := tarReader.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("could not read tar: %w", err)
return nil, fmt.Errorf("could not read tar: %w", err)
}
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
// duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and
Expand Down Expand Up @@ -345,6 +369,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
continue
}

// Check if the file is a whiteout.
isWhiteout := whiteout.IsWhiteout(basename)
// TODO: b/379094217 - Handle Opaque Whiteouts
if isWhiteout {
Expand All @@ -363,8 +388,33 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// any forward slashes to the appropriate OS specific path separator.
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))

// If the file is not required, then skip it.
if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) {
// If the file already exists in the current chain layer, then skip it. This is done because
// the tar file could be read multiple times to handle required symlinks.
if currentChainLayer.fileNodeTree.Get(virtualPath) != nil {
continue
}

// Skip files that are not required by extractors and are not targets of required symlinks.
// Try multiple paths variations
// (with parent dir, without leading slash, with leading slash). For example:
// - `realFilePath`: `tmp/12345/etc/os-release`. This is used when actually writing the file to disk.
// - `cleanedFilePath`: `etc/os-release`. This is used when checking if the file is required.
// - `virtualPath`: `/etc/os-release`. This is used when checking if the file is required.
required := false
for _, p := range []string{realFilePath, cleanedFilePath, virtualPath} {
if requirer.FileRequired(p, header.FileInfo()) {
required = true
break
}
if _, ok := requiredTargets[p]; ok {
required = true

// The required target has been checked, so it can be marked as not required.
requiredTargets[p] = false
break
}
}
if !required {
continue
}

Expand All @@ -375,7 +425,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
case tar.TypeReg:
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeSymlink, tar.TypeLink:
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
newNode, err = img.handleSymlink(virtualPath, originLayerID, tarReader, header, isWhiteout, requiredTargets)
default:
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
continue
Expand All @@ -386,20 +436,20 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
log.Warnf("failed to handle tar entry with path %s: %w", virtualPath, err)
continue
}
return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
return nil, fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
}

// In each outer loop, a layer is added to each relevant output chainLayer slice. Because the
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
// each chainLayer, as they would have been overwritten.
fillChainLayersWithFileNode(chainLayersToFill, newNode)
}
return nil
return requiredTargets, nil
}

// handleSymlink returns the symlink header mode. Symlinks are handled by creating a fileNode with
// the symlink mode with additional metadata.
func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) {
func (img *Image) handleSymlink(virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool, requiredTargets map[string]bool) (*fileNode, error) {
targetPath := filepath.ToSlash(header.Linkname)
if targetPath == "" {
return nil, fmt.Errorf("symlink header has no target path")
Expand All @@ -415,6 +465,8 @@ func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string,
targetPath = path.Clean(path.Join(path.Dir(virtualPath), targetPath))
}

requiredTargets[targetPath] = true

return &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
Expand Down
54 changes: 54 additions & 0 deletions artifact/image/layerscanning/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,60 @@ func TestFromTarball(t *testing.T) {
},
},
},
{
name: "image with required symlink but non-required target path",
tarPath: filepath.Join(testdataDir, "symlink-basic.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
// dir1/sample.txt is not explicitly required, but should be unpacked because it is the
// target of a required symlink.
Requirer: require.NewFileRequirerPaths([]string{
"/dir1/absolute-symlink.txt",
}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "dir1/sample.txt",
content: "sample text\n",
},
{
filepath: "dir1/absolute-symlink.txt",
content: "sample text\n",
},
},
},
},
},
{
name: "image with symlink chain but non-required target path",
tarPath: filepath.Join(testdataDir, "symlink-basic.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: require.NewFileRequirerPaths([]string{
"/dir1/chain-symlink.txt",
}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "dir1/sample.txt",
content: "sample text\n",
},
{
filepath: "dir1/absolute-symlink.txt",
content: "sample text\n",
},
{
filepath: "dir1/chain-symlink.txt",
content: "sample text\n",
},
},
},
},
},
{
name: "image with symlink cycle",
tarPath: filepath.Join(testdataDir, "symlink-cycle.tar"),
Expand Down
19 changes: 9 additions & 10 deletions artifact/image/layerscanning/image/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ var (

// Layer implements the Layer interface.
type Layer struct {
v1Layer v1.Layer
diffID digest.Digest
buildCommand string
isEmpty bool
uncompressed io.ReadCloser
}

// FS returns a scalibr compliant file system.
Expand All @@ -75,10 +75,15 @@ func (layer *Layer) Command() string {
return layer.buildCommand
}

// Uncompressed gets the uncompressed ReadCloser which holds all files in the layer.
// Uncompressed returns a new uncompressed ReadCloser from the v1 layer which holds all files in the
// layer.
// TODO: b/378938357 - Figure out a better way to get the uncompressed ReadCloser.
func (layer *Layer) Uncompressed() (io.ReadCloser, error) {
return layer.uncompressed, nil
uncompressed, err := layer.v1Layer.Uncompressed()
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err)
}
return uncompressed, nil
}

// convertV1Layer converts a v1.Layer to a scalibr Layer. This involves getting the diffID and
Expand All @@ -89,16 +94,11 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err
return nil, fmt.Errorf("%w: %w", ErrDiffIDMissingFromLayer, err)
}

uncompressed, err := v1Layer.Uncompressed()
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err)
}

return &Layer{
v1Layer: v1Layer,
diffID: digest.Digest(diffID.String()),
buildCommand: command,
isEmpty: isEmpty,
uncompressed: uncompressed,
}, nil
}

Expand All @@ -115,7 +115,6 @@ type chainLayer struct {

// FS returns a scalibrfs.FS that can be used to scan for inventory.
func (chainLayer *chainLayer) FS() scalibrfs.FS {
// root should be "/" given we are dealing with file paths.
return &FS{
tree: chainLayer.fileNodeTree,
maxSymlinkDepth: DefaultMaxSymlinkDepth,
Expand Down
4 changes: 2 additions & 2 deletions artifact/image/layerscanning/image/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ func TestConvertV1Layer(t *testing.T) {
command: "ADD file",
isEmpty: false,
wantLayer: &Layer{
v1Layer: fakev1layer.New("abc123", "ADD file", false, reader),
diffID: "sha256:abc123",
buildCommand: "ADD file",
isEmpty: false,
uncompressed: reader,
},
},
{
Expand All @@ -77,7 +77,7 @@ func TestConvertV1Layer(t *testing.T) {
if tc.wantError != nil && gotError == tc.wantError {
t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError)
}
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{})); tc.wantLayer != nil && diff != "" {
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" {
t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer)
}
})
Expand Down
27 changes: 27 additions & 0 deletions artifact/image/testfixtures/symlinks-across-layers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use Alpine as the builder since the final image is built on scratch
# which doesn't contain the `ln` command to generate symlinks.
FROM alpine:latest as builder

RUN mkdir dir1
RUN mkdir dir2
RUN mkdir


RUN echo "sample text" > dir1/sample.txt
RUN ln -s /dir1/sample.txt /dir2/absolute-symlink.txt
RUN ln -s /dir2/absolute-symlink.txt /dir3/chain-symlink.txt


# - root
# - dir1
# - sample.txt
# - dir2
# - absolute-symlink.txt -> /dir1/sample.txt
# - dir3
# - chain-symlink.txt -> /dir2absolute-symlink.txt
FROM scratch

# Must copy over the entire directory to preserve the symlinks.
COPY --from=builder /dir3/ /dir3/
COPY --from=builder /dir2/ /dir2/
COPY --from=builder /dir1/ /dir1/

0 comments on commit c591953

Please sign in to comment.