From 0586482f327cb0fbf9c1bb92ede5f78b3307397a Mon Sep 17 00:00:00 2001 From: Mario Leyva Date: Wed, 15 Jan 2025 19:31:01 -0800 Subject: [PATCH] [Layer Scanning] Add symlinks support for the `FileRequirer` logic in `image.FromTarball`; changed the `Layer.Uncompressed` method to return a new `ReaderCloser` every time the method is called. PiperOrigin-RevId: 716044428 --- artifact/image/layerscanning/image/image.go | 181 +++++++++++++----- .../image/layerscanning/image/image_test.go | 104 +++++++++- artifact/image/layerscanning/image/layer.go | 19 +- .../image/layerscanning/image/layer_test.go | 4 +- .../symlinks-across-layers/Dockerfile | 27 +++ artifact/image/whiteout/whiteout.go | 32 +++- artifact/image/whiteout/whiteout_test.go | 116 +++++++++++ 7 files changed, 417 insertions(+), 66 deletions(-) create mode 100644 artifact/image/testfixtures/symlinks-across-layers/Dockerfile diff --git a/artifact/image/layerscanning/image/image.go b/artifact/image/layerscanning/image/image.go index 80664a33..99f8b6c4 100644 --- a/artifact/image/layerscanning/image/image.go +++ b/artifact/image/layerscanning/image/image.go @@ -33,6 +33,7 @@ import ( "github.com/google/go-containerregistry/pkg/v1/tarball" scalibrImage "github.com/google/osv-scalibr/artifact/image" "github.com/google/osv-scalibr/artifact/image/pathtree" + "github.com/google/osv-scalibr/artifact/image/require" "github.com/google/osv-scalibr/artifact/image/symlink" "github.com/google/osv-scalibr/artifact/image/whiteout" "github.com/google/osv-scalibr/log" @@ -50,6 +51,8 @@ var ( ErrFileReadLimitExceeded = errors.New("file exceeds read limit") // ErrSymlinkPointsOutsideRoot is returned when a symlink points outside the root. ErrSymlinkPointsOutsideRoot = errors.New("symlink points outside the root") + // ErrInvalidConfig is returned when the image config is invalid. + ErrInvalidConfig = errors.New("invalid image config") ) // ======================================================== @@ -59,20 +62,33 @@ var ( // Config contains the configuration to load an Image. type Config struct { MaxFileBytes int64 + Requirer require.FileRequirer } // DefaultConfig returns the default configuration to load an Image. func DefaultConfig() *Config { return &Config{ MaxFileBytes: DefaultMaxFileBytes, + // All files are required by default. + Requirer: &require.FileRequirerAll{}, } } +func validateConfig(config *Config) error { + if config.MaxFileBytes <= 0 { + return fmt.Errorf("%w: max file bytes must be positive: %d", ErrInvalidConfig, config.MaxFileBytes) + } + if config.Requirer == nil { + return fmt.Errorf("%w: requirer must be specified", ErrInvalidConfig) + } + return nil +} + // Image is a container image. It is composed of a set of layers that can be scanned for software // inventory. It contains the proper metadata to attribute inventory to layers. type Image struct { chainLayers []*chainLayer - maxFileBytes int64 + config *Config ExtractDir string BaseImageIndex int } @@ -113,11 +129,16 @@ func FromTarball(tarPath string, config *Config) (*Image, error) { // FromV1Image takes a v1.Image and produces a layer-scannable Image. The steps taken are as // follows: // -// (1) Retrieves v1.Layers, configFile. Creates tempPath to store the image files. -// (2) Initializes the output image and the chain layers. -// (3) Unpacks the layers by looping through the layers in reverse, while filling in the files +// (1) Validates the user input image config object. +// (2) Retrieves v1.Layers, configFile. Creates tempPath to store the image files. +// (3) Initializes the output image and the chain layers. +// (4) Unpacks the layers by looping through the layers in reverse, while filling in the files // into the appropriate chain layer. func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { + if err := validateConfig(config); err != nil { + return nil, fmt.Errorf("invalid image config: %w", err) + } + configFile, err := v1Image.ConfigFile() if err != nil { return nil, fmt.Errorf("failed to load config file: %w", err) @@ -145,9 +166,9 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { outputImage := Image{ chainLayers: chainLayers, + config: config, ExtractDir: tempPath, BaseImageIndex: baseImageIndex, - maxFileBytes: config.MaxFileBytes, } // Add the root directory to each chain layer. If this is not done, then the virtual paths won't @@ -173,44 +194,65 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) { } } - // Reverse loop through the layers to start from the latest layer first. This allows us to skip - // all files already seen. - for i := len(chainLayers) - 1; i >= 0; i-- { - chainLayer := chainLayers[i] + requiredTargets := make(map[string]bool) + for range DefaultMaxSymlinkDepth { + // Reverse loop through the layers to start from the latest layer first. This allows us to skip + // all files already seen. + for i := len(chainLayers) - 1; i >= 0; i-- { + chainLayer := chainLayers[i] - // If the layer is empty, then there is nothing to do. - if chainLayer.latestLayer.IsEmpty() { - continue - } + // If the layer is empty, then there is nothing to do. + if chainLayer.latestLayer.IsEmpty() { + continue + } - originLayerID := chainLayer.latestLayer.DiffID().Encoded() + originLayerID := chainLayer.latestLayer.DiffID().Encoded() - // Create the chain layer directory if it doesn't exist. - // Use filepath here as it is a path that will be written to disk. - dirPath := filepath.Join(tempPath, originLayerID) - if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) { - return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err) - } + // Create the chain layer directory if it doesn't exist. + // Use filepath here as it is a path that will be written to disk. + dirPath := filepath.Join(tempPath, originLayerID) + if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) { + return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err) + } - chainLayersToFill := chainLayers[i:] - layerReader, err := chainLayer.latestLayer.Uncompressed() - if err != nil { - return &outputImage, err + chainLayersToFill := chainLayers[i:] + layerReader, err := chainLayer.latestLayer.Uncompressed() + if err != nil { + return &outputImage, err + } + + err = func() error { + // Manually close at the end of the for loop. + defer layerReader.Close() + + tarReader := tar.NewReader(layerReader) + requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets) + if err != nil { + return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err) + } + return nil + }() + + if err != nil { + return &outputImage, err + } } - err = func() error { - // Manually close at the end of the for loop. - defer layerReader.Close() + // If there are no more required targets from symlinks, then there is no need to continue. + if len(requiredTargets) == 0 { + break + } - tarReader := tar.NewReader(layerReader) - if err := fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill); err != nil { - return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err) + stillHaveRequiredTargets := false + for _, isRequired := range requiredTargets { + if isRequired { + stillHaveRequiredTargets = true + break } - return nil - }() + } - if err != nil { - return &outputImage, err + if !stillHaveRequiredTargets { + break } } return &outputImage, nil @@ -282,18 +324,20 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile) ([]*c // fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The // chainLayersToFill are the chain layers that will be filled with the files via the virtual // filesystem. -func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer) error { +func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) { + currentChainLayer := chainLayersToFill[0] + for { header, err := tarReader.Next() if errors.Is(err, io.EOF) { break } if err != nil { - return fmt.Errorf("could not read tar: %w", err) + return nil, fmt.Errorf("could not read tar: %w", err) } - // Some tools prepend everything with "./", so if we don't Clean the - // name, we may have duplicate entries, which angers tar-split. - // Using path instead of filepath to keep `/` and deterministic behavior + // Some tools prepend everything with "./", so if we don't path.Clean the name, we may have + // duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and + // deterministic behavior. cleanedFilePath := path.Clean(filepath.ToSlash(header.Name)) // Prevent "Zip Slip" @@ -301,8 +345,8 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay continue } - // Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR - // and to not depend on internal tar package guess which prefers USTAR over PAX. + // Force PAX format to remove Name/Linkname length limit of 100 characters required by USTAR and + // to not depend on internal tar package guess which prefers USTAR over PAX. header.Format = tar.FormatPAX // There is a difference between the filepath and path modules. The filepath module will handle @@ -325,10 +369,11 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay continue } - tombstone := strings.HasPrefix(basename, whiteout.WhiteoutPrefix) + // Check if the file is a whiteout. + isWhiteout := whiteout.IsWhiteout(basename) // TODO: b/379094217 - Handle Opaque Whiteouts - if tombstone { - basename = basename[len(whiteout.WhiteoutPrefix):] + if isWhiteout { + basename = whiteout.ToPath(basename) } // If we're checking a directory, don't filepath.Join names. @@ -343,14 +388,44 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay // any forward slashes to the appropriate OS specific path separator. realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath)) + // If the file already exists in the current chain layer, then skip it. This is done because + // the tar file could be read multiple times to handle required symlinks. + if currentChainLayer.fileNodeTree.Get(virtualPath) != nil { + continue + } + + // Skip files that are not required by extractors and are not targets of required symlinks. + // Try multiple paths variations + // (with parent dir, without leading slash, with leading slash). For example: + // - `realFilePath`: `tmp/12345/etc/os-release`. This is used when actually writing the file to disk. + // - `cleanedFilePath`: `etc/os-release`. This is used when checking if the file is required. + // - `virtualPath`: `/etc/os-release`. This is used when checking if the file is required. + required := false + for _, p := range []string{realFilePath, cleanedFilePath, virtualPath} { + if requirer.FileRequired(p, header.FileInfo()) { + required = true + break + } + if _, ok := requiredTargets[p]; ok { + required = true + + // The required target has been checked, so it can be marked as not required. + requiredTargets[p] = false + break + } + } + if !required { + continue + } + var newNode *fileNode switch header.Typeflag { case tar.TypeDir: - newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleDir(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout) case tar.TypeReg: - newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout) case tar.TypeSymlink, tar.TypeLink: - newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, tombstone) + newNode, err = img.handleSymlink(virtualPath, originLayerID, tarReader, header, isWhiteout, requiredTargets) default: log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name) continue @@ -361,7 +436,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay log.Warnf("failed to handle tar entry with path %s: %w", virtualPath, err) continue } - return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err) + return nil, fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err) } // In each outer loop, a layer is added to each relevant output chainLayer slice. Because the @@ -369,12 +444,12 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay // each chainLayer, as they would have been overwritten. fillChainLayersWithFileNode(chainLayersToFill, newNode) } - return nil + return requiredTargets, nil } // handleSymlink returns the symlink header mode. Symlinks are handled by creating a fileNode with // the symlink mode with additional metadata. -func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) { +func (img *Image) handleSymlink(virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool, requiredTargets map[string]bool) (*fileNode, error) { targetPath := filepath.ToSlash(header.Linkname) if targetPath == "" { return nil, fmt.Errorf("symlink header has no target path") @@ -390,6 +465,8 @@ func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, targetPath = path.Clean(path.Join(path.Dir(virtualPath), targetPath)) } + requiredTargets[targetPath] = true + return &fileNode{ extractDir: img.ExtractDir, originLayerID: originLayerID, @@ -437,8 +514,8 @@ func (img *Image) handleFile(realFilePath, virtualPath, originLayerID string, ta } defer f.Close() - numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.maxFileBytes)) - if numBytes >= img.maxFileBytes || errors.Is(err, io.EOF) { + numBytes, err := io.Copy(f, io.LimitReader(tarReader, img.config.MaxFileBytes)) + if numBytes >= img.config.MaxFileBytes || errors.Is(err, io.EOF) { return nil, ErrFileReadLimitExceeded } diff --git a/artifact/image/layerscanning/image/image_test.go b/artifact/image/layerscanning/image/image_test.go index 970139d1..f6f8463e 100644 --- a/artifact/image/layerscanning/image/image_test.go +++ b/artifact/image/layerscanning/image/image_test.go @@ -27,6 +27,7 @@ import ( v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/types" "github.com/google/osv-scalibr/artifact/image" + "github.com/google/osv-scalibr/artifact/image/require" ) const testdataDir = "testdata" @@ -132,6 +133,23 @@ func TestFromTarball(t *testing.T) { wantErrDuringImageCreation error wantErrWhileReadingFiles error }{ + { + name: "invalid config - non positive maxFileBytes", + tarPath: filepath.Join(testdataDir, "single-file.tar"), + config: &Config{ + Requirer: &require.FileRequirerAll{}, + MaxFileBytes: 0, + }, + wantErrDuringImageCreation: ErrInvalidConfig, + }, + { + name: "invalid config - missing requirer", + tarPath: filepath.Join(testdataDir, "single-file.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + }, + wantErrDuringImageCreation: ErrInvalidConfig, + }, { name: "image with one file", tarPath: filepath.Join(testdataDir, "single-file.tar"), @@ -294,6 +312,7 @@ func TestFromTarball(t *testing.T) { tarPath: filepath.Join(testdataDir, "single-file.tar"), config: &Config{ MaxFileBytes: 1, + Requirer: &require.FileRequirerAll{}, }, wantChainLayerEntries: []chainLayerEntries{ { @@ -340,6 +359,60 @@ func TestFromTarball(t *testing.T) { }, }, }, + { + name: "image with required symlink but non-required target path", + tarPath: filepath.Join(testdataDir, "symlink-basic.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + // dir1/sample.txt is not explicitly required, but should be unpacked because it is the + // target of a required symlink. + Requirer: require.NewFileRequirerPaths([]string{ + "/dir1/absolute-symlink.txt", + }), + }, + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/sample.txt", + content: "sample text\n", + }, + { + filepath: "dir1/absolute-symlink.txt", + content: "sample text\n", + }, + }, + }, + }, + }, + { + name: "image with symlink chain but non-required target path", + tarPath: filepath.Join(testdataDir, "symlink-basic.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + Requirer: require.NewFileRequirerPaths([]string{ + "/dir1/chain-symlink.txt", + }), + }, + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "dir1/sample.txt", + content: "sample text\n", + }, + { + filepath: "dir1/absolute-symlink.txt", + content: "sample text\n", + }, + { + filepath: "dir1/chain-symlink.txt", + content: "sample text\n", + }, + }, + }, + }, + }, { name: "image with symlink cycle", tarPath: filepath.Join(testdataDir, "symlink-cycle.tar"), @@ -427,12 +500,39 @@ func TestFromTarball(t *testing.T) { config: DefaultConfig(), wantErrDuringImageCreation: ErrSymlinkPointsOutsideRoot, }, + { + name: "require single file from images", + tarPath: filepath.Join(testdataDir, "multiple-files.tar"), + config: &Config{ + MaxFileBytes: DefaultMaxFileBytes, + // Only require foo.txt. + Requirer: require.NewFileRequirerPaths([]string{"/foo.txt"}), + }, + wantChainLayerEntries: []chainLayerEntries{ + { + filepathContentPairs: []filepathContentPair{ + { + filepath: "foo.txt", + content: "foo\n", + }, + }, + }, + { + // dir1/bar.txt and dir1/baz.txt are ignored in the second layer. + filepathContentPairs: []filepathContentPair{ + { + filepath: "foo.txt", + content: "foo\n", + }, + }, + }, + }, + }, } for _, tc := range tests { t.Run(tc.name, func(t *testing.T) { gotImage, gotErr := FromTarball(tc.tarPath, tc.config) - defer gotImage.CleanUp() if tc.wantErrDuringImageCreation != nil { if errors.Is(gotErr, tc.wantErrDuringImageCreation) { @@ -444,6 +544,8 @@ func TestFromTarball(t *testing.T) { if gotErr != nil { t.Fatalf("FromTarball(%v) returned unexpected error: %v", tc.tarPath, gotErr) } + // Only defer call to CleanUp if the image was created successfully. + defer gotImage.CleanUp() chainLayers, err := gotImage.ChainLayers() if err != nil { diff --git a/artifact/image/layerscanning/image/layer.go b/artifact/image/layerscanning/image/layer.go index 688ce966..faabc4d4 100644 --- a/artifact/image/layerscanning/image/layer.go +++ b/artifact/image/layerscanning/image/layer.go @@ -49,10 +49,10 @@ var ( // Layer implements the Layer interface. type Layer struct { + v1Layer v1.Layer diffID digest.Digest buildCommand string isEmpty bool - uncompressed io.ReadCloser } // FS returns a scalibr compliant file system. @@ -75,10 +75,15 @@ func (layer *Layer) Command() string { return layer.buildCommand } -// Uncompressed gets the uncompressed ReadCloser which holds all files in the layer. +// Uncompressed returns a new uncompressed ReadCloser from the v1 layer which holds all files in the +// layer. // TODO: b/378938357 - Figure out a better way to get the uncompressed ReadCloser. func (layer *Layer) Uncompressed() (io.ReadCloser, error) { - return layer.uncompressed, nil + uncompressed, err := layer.v1Layer.Uncompressed() + if err != nil { + return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err) + } + return uncompressed, nil } // convertV1Layer converts a v1.Layer to a scalibr Layer. This involves getting the diffID and @@ -89,16 +94,11 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err return nil, fmt.Errorf("%w: %w", ErrDiffIDMissingFromLayer, err) } - uncompressed, err := v1Layer.Uncompressed() - if err != nil { - return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err) - } - return &Layer{ + v1Layer: v1Layer, diffID: digest.Digest(diffID.String()), buildCommand: command, isEmpty: isEmpty, - uncompressed: uncompressed, }, nil } @@ -115,7 +115,6 @@ type chainLayer struct { // FS returns a scalibrfs.FS that can be used to scan for inventory. func (chainLayer *chainLayer) FS() scalibrfs.FS { - // root should be "/" given we are dealing with file paths. return &FS{ tree: chainLayer.fileNodeTree, maxSymlinkDepth: DefaultMaxSymlinkDepth, diff --git a/artifact/image/layerscanning/image/layer_test.go b/artifact/image/layerscanning/image/layer_test.go index 2871002a..eb7b87fd 100644 --- a/artifact/image/layerscanning/image/layer_test.go +++ b/artifact/image/layerscanning/image/layer_test.go @@ -48,10 +48,10 @@ func TestConvertV1Layer(t *testing.T) { command: "ADD file", isEmpty: false, wantLayer: &Layer{ + v1Layer: fakev1layer.New("abc123", "ADD file", false, reader), diffID: "sha256:abc123", buildCommand: "ADD file", isEmpty: false, - uncompressed: reader, }, }, { @@ -77,7 +77,7 @@ func TestConvertV1Layer(t *testing.T) { if tc.wantError != nil && gotError == tc.wantError { t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError) } - if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{})); tc.wantLayer != nil && diff != "" { + if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" { t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer) } }) diff --git a/artifact/image/testfixtures/symlinks-across-layers/Dockerfile b/artifact/image/testfixtures/symlinks-across-layers/Dockerfile new file mode 100644 index 00000000..2d7dad5d --- /dev/null +++ b/artifact/image/testfixtures/symlinks-across-layers/Dockerfile @@ -0,0 +1,27 @@ +# Use Alpine as the builder since the final image is built on scratch +# which doesn't contain the `ln` command to generate symlinks. +FROM alpine:latest as builder + +RUN mkdir dir1 +RUN mkdir dir2 +RUN mkdir + + +RUN echo "sample text" > dir1/sample.txt +RUN ln -s /dir1/sample.txt /dir2/absolute-symlink.txt +RUN ln -s /dir2/absolute-symlink.txt /dir3/chain-symlink.txt + + +# - root +# - dir1 +# - sample.txt +# - dir2 +# - absolute-symlink.txt -> /dir1/sample.txt +# - dir3 +# - chain-symlink.txt -> /dir2absolute-symlink.txt +FROM scratch + +# Must copy over the entire directory to preserve the symlinks. +COPY --from=builder /dir3/ /dir3/ +COPY --from=builder /dir2/ /dir2/ +COPY --from=builder /dir1/ /dir1/ diff --git a/artifact/image/whiteout/whiteout.go b/artifact/image/whiteout/whiteout.go index 9b538f79..e432fe9f 100644 --- a/artifact/image/whiteout/whiteout.go +++ b/artifact/image/whiteout/whiteout.go @@ -19,6 +19,7 @@ package whiteout import ( "fmt" "io/fs" + "path" "path/filepath" "strings" @@ -39,7 +40,7 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) { err := fs.WalkDir(scalibrfs, ".", func(path string, d fs.DirEntry, err error) error { if err != nil { - //nolint:nilerr // continue walking if there is an error + //nolint:nilerr // Continue walking if there is an error. return nil } @@ -60,3 +61,32 @@ func Files(scalibrfs scalibrfs.FS) (map[string]struct{}, error) { } return whiteouts, nil } + +// IsWhiteout returns true if a path is a whiteout path. +func IsWhiteout(p string) bool { + _, file := path.Split(p) + return strings.HasPrefix(file, WhiteoutPrefix) +} + +// ToWhiteout returns the whiteout version of a path. +func ToWhiteout(p string) string { + dir, file := path.Split(p) + return path.Join(dir, fmt.Sprintf("%s%s", WhiteoutPrefix, file)) +} + +// ToPath returns the non whiteout version of a path. +func ToPath(p string) string { + dir, file := path.Split(p) + + if strings.HasPrefix(file, WhiteoutPrefix) { + file = strings.TrimPrefix(file, WhiteoutPrefix) + } + + nonWhitoutPath := path.Join(dir, file) + + if dir != "" && file == "" { + nonWhitoutPath = fmt.Sprintf("%s/", nonWhitoutPath) + } + + return nonWhitoutPath +} diff --git a/artifact/image/whiteout/whiteout_test.go b/artifact/image/whiteout/whiteout_test.go index e1ce5808..91976bfa 100644 --- a/artifact/image/whiteout/whiteout_test.go +++ b/artifact/image/whiteout/whiteout_test.go @@ -147,3 +147,119 @@ func TestWhiteout(t *testing.T) { }) } } + +func TestIsWhiteout(t *testing.T) { + testCases := []struct { + desc string + path string + want bool + }{ + { + desc: "Empty path", + path: "", + want: false, + }, + { + desc: "Simple file path", + path: "file.txt", + want: false, + }, + { + desc: "Path with directories", + path: "dir1/dir2/foo.txt", + want: false, + }, + { + desc: "Simple whiteout path", + path: ".wh.file.txt", + want: true, + }, + { + desc: "Whiteout path with directories", + path: "dir1/dir2/.wh.foo.txt", + want: true, + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + got := whiteout.IsWhiteout(tc.path) + if got != tc.want { + t.Errorf("IsWhiteout(%q) = %v, want: %v", tc.path, got, tc.want) + } + }) + } +} + +func TestToWhiteout(t *testing.T) { + testCases := []struct { + desc string + path string + want string + }{ + { + desc: "Empty path", + path: "", + want: ".wh.", + }, + { + desc: "Simple file path", + path: "file.txt", + want: ".wh.file.txt", + }, + { + desc: "Path with directories", + path: "dir1/dir2/foo.txt", + want: "dir1/dir2/.wh.foo.txt", + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + got := whiteout.ToWhiteout(tc.path) + if got != tc.want { + t.Errorf("ToWhiteout(%q) = %q, want: %q", tc.path, got, tc.want) + } + }) + } +} + +func TestToPath(t *testing.T) { + testCases := []struct { + desc string + path string + want string + }{ + { + desc: "Empty path", + path: "", + want: "", + }, + { + desc: "Simple file path", + path: "file.txt", + want: "file.txt", + }, + { + desc: "Path with directories", + path: "dir1/dir2/foo.txt", + want: "dir1/dir2/foo.txt", + }, + { + desc: "Simple whiteout path", + path: ".wh.file.txt", + want: "file.txt", + }, + { + desc: "Whiteout path with directories", + path: "dir1/dir2/.wh.foo.txt", + want: "dir1/dir2/foo.txt", + }, + } + for _, tc := range testCases { + t.Run(tc.desc, func(t *testing.T) { + got := whiteout.ToPath(tc.path) + if got != tc.want { + t.Errorf("ToPath(%q) = %q, want: %q", tc.path, got, tc.want) + } + }) + } +}