From ec34abb1060de76debe7cfebc03244f02c2c3e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Virtus?= Date: Thu, 15 Jun 2023 01:57:58 +0200 Subject: [PATCH] extract: Add callbacks This commit adds support for callbacks from deb/extract.go. These callbacks will be used by the Chisel DB implementation that will follow to record metadata about installed paths. The reason for adding callbacks instead of handling path recording in deb/extract is to keep the concerns separated between internal packages. In Chisel DB we need to record paths to slices relationship and digests of regular files. But deb/extract doesn't know about slices. slicer on the other hand knows about slices, and so it's the proper place to drive Chisel DB creation. But slicer needs to cooperate with deb/extract because it doesn't know which paths will be matched by globs and because it needs the content of matched regular files. The callbacks provide a mechanism for slicer to accomplish this. --- internal/deb/extract.go | 101 +++++++++++++--- internal/deb/extract_test.go | 228 +++++++++++++++++++++++++++++++++++ 2 files changed, 312 insertions(+), 17 deletions(-) diff --git a/internal/deb/extract.go b/internal/deb/extract.go index 98dba0b3..27f644f5 100644 --- a/internal/deb/extract.go +++ b/internal/deb/extract.go @@ -21,11 +21,56 @@ import ( "github.com/canonical/chisel/internal/strdist" ) +type ConsumeData func(reader io.Reader) error + +// DataCallback function is called when a tarball entry of a regular file is +// going to be extracted. +// +// The source and size parameters are set to the file's path in the tarball and +// the size of its content, respectively. When the returned ConsumeData function +// is not nil, it is called with the file's content. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each occurrence. +// +// If either DataCallback or ConsumeData function returns a non-nil error, the +// Extract() function will fail with that error. +type DataCallback func(source string, size int64) (ConsumeData, error) + +// The CreateCallback function is called when a tarball entry is going to be +// extracted to a target path. +// +// The target, link, and mode parameters are set to the target's path, symbolic +// link target, and mode, respectively. The target's filesystem type can be +// determined from these parameters. If the link is not empty, the target is a +// symbolic link. Otherwise, if the target's path ends with /, it is a +// directory. Otherwise, it is a regular file. +// +// When the source parameter is not empty, the target is going to be extracted +// from a tarball entry with the source path. The function may be called more +// than once with the same source when the tarball entry is extracted to +// multiple different targets. +// +// Otherwise, the mode is 0755 and the target is going to be an implicitly +// created parent directory of another target, and when a directory entry with +// that path is later encountered in the tarball with a different mode, the +// function will be called again with the same target, source equal to the +// target, and the different mode. +// +// When the source path occurs in the tarball more than once, this function will +// be called for each target path for each occurrence. +// +// If CreateCallback function returns a non-nil error, the Extract() function +// will fail with that error. +type CreateCallback func(source, target, link string, mode fs.FileMode) error + type ExtractOptions struct { Package string TargetDir string Extract map[string][]ExtractInfo Globbed map[string][]string + OnData DataCallback + OnCreate CreateCallback } type ExtractInfo struct { @@ -209,14 +254,21 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } info := dirInfos[dir] + source := dir if info.created { return nil } else if info.mode == 0 { info.mode = fs.ModeDir | 0755 + source = "" } if err := createParents(dir); err != nil { return err } + if options.OnCreate != nil { + if err := options.OnCreate(source, dir, "", info.mode); err != nil { + return err + } + } create := fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, dir), Mode: info.mode, @@ -229,27 +281,37 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { return nil } - var contentCache []byte - var contentIsCached = len(extractInfos) > 1 && sourceMode.IsRegular() && globPath == "" - if contentIsCached { - // Read and cache the content so it may be reused. - // As an alternative, to avoid having an entire file in - // memory at once this logic might open the first file - // written and copy it every time. For now, the choice - // is speed over memory efficiency. - data, err := io.ReadAll(tarReader) - if err != nil { - return err + getReader := func() io.Reader { return tarReader } + + if sourceMode.IsRegular() { + var consumeData ConsumeData + if options.OnData != nil { + var err error + if consumeData, err = options.OnData(sourcePath, tarHeader.Size); err != nil { + return err + } + } + if consumeData != nil || (len(extractInfos) > 1 && globPath == "") { + // Read and cache the content so it may be reused. + // As an alternative, to avoid having an entire file in + // memory at once this logic might open the first file + // written and copy it every time. For now, the choice + // is speed over memory efficiency. + data, err := io.ReadAll(tarReader) + if err != nil { + return err + } + getReader = func() io.Reader { return bytes.NewReader(data) } + } + if consumeData != nil { + if err := consumeData(getReader()); err != nil { + return err + } } - contentCache = data } - var pathReader io.Reader = tarReader origMode := tarHeader.Mode for _, extractInfo := range extractInfos { - if contentIsCached { - pathReader = bytes.NewReader(contentCache) - } var targetPath string if globPath == "" { targetPath = extractInfo.Path @@ -264,10 +326,15 @@ func extractData(dataReader io.Reader, options *ExtractOptions) error { tarHeader.Mode = int64(extractInfo.Mode) } fsMode := tarHeader.FileInfo().Mode() + if options.OnCreate != nil { + if err := options.OnCreate(sourcePath, targetPath, tarHeader.Linkname, fsMode); err != nil { + return err + } + } err := fsutil.Create(&fsutil.CreateOptions{ Path: filepath.Join(options.TargetDir, targetPath), Mode: fsMode, - Data: pathReader, + Data: getReader(), Link: tarHeader.Linkname, }) if err != nil { diff --git a/internal/deb/extract_test.go b/internal/deb/extract_test.go index e86b4553..f18ad24f 100644 --- a/internal/deb/extract_test.go +++ b/internal/deb/extract_test.go @@ -2,6 +2,8 @@ package deb_test import ( "bytes" + "io" + "io/fs" . "gopkg.in/check.v1" @@ -470,3 +472,229 @@ func (s *S) TestExtract(c *C) { c.Assert(result, DeepEquals, test.result) } } + +type callbackTest struct { + summary string + pkgdata []byte + extract map[string][]deb.ExtractInfo + callbacks [][]any + noConsume bool + noData bool +} + +var callbackTests = []callbackTest{{ + summary: "Trivial", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Reg(0601, "./a/b/c", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"data", "/a/b/c", 0, []byte{}}, + {"create", "/a/b/c", "/a/b/c", "", 0601}, + }, +}, { + summary: "Data", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Reg(0601, "./a/b", "foo"), + Reg(0602, "./a/c", "bar"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"data", "/a/b", 3, []byte("foo")}, + {"create", "/a/b", "/a/b", "", 0601}, + {"data", "/a/c", 3, []byte("bar")}, + {"create", "/a/c", "/a/c", "", 0602}, + }, +}, { + summary: "Symlinks", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", ""), + Lnk(0777, "./b", "/a"), + Lnk(0777, "./c", "/d"), + }), + extract: map[string][]deb.ExtractInfo{ + "/**": []deb.ExtractInfo{{Path: "/**"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/b", "/b", "/a", 0777}, + {"create", "/c", "/c", "/d", 0777}, + }, +}, { + summary: "Simple copied paths", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", ""), + Reg(0602, "./b", ""), + Dir(0701, "./c/"), + Reg(0603, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/a"}}, + "/c/d": []deb.ExtractInfo{{Path: "/b"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "/a", "/a", "", 0601}, + {"create", "/c/d", "/b", "", 0603}, + }, +}, { + summary: "Parent directories", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Reg(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/c/": []deb.ExtractInfo{{Path: "/a/b/c/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories with globs", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Dir(0702, "./a/b/"), + Dir(0703, "./a/b/c/"), + Reg(0601, "./a/b/c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "/a/", "/a/", "", 0701}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + }, +}, { + summary: "Parent directories out of order", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a/b/c/d", ""), + Dir(0703, "./a/b/c/"), + Dir(0702, "./a/b/"), + Dir(0701, "./a/"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b/*/": []deb.ExtractInfo{{Path: "/a/b/*/"}}, + }, + callbacks: [][]any{ + {"create", "", "/a/", "", 0755}, + {"create", "", "/a/b/", "", 0755}, + {"create", "/a/b/c/", "/a/b/c/", "", 0703}, + {"create", "/a/b/", "/a/b/", "", 0702}, + {"create", "/a/", "/a/", "", 0701}, + }, +}, { + summary: "Parent directories with early copy path", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Dir(0701, "./a/"), + Reg(0601, "./a/b", ""), + Dir(0702, "./c/"), + Reg(0602, "./c/d", ""), + }), + extract: map[string][]deb.ExtractInfo{ + "/a/b": []deb.ExtractInfo{{Path: "/c/d"}}, + }, + noData: true, + callbacks: [][]any{ + {"create", "", "/c/", "", 0755}, + {"create", "/a/b", "/c/d", "", 0601}, + {"create", "/c/", "/c/", "", 0702}, + }, +}, { + summary: "Same file twice with different content", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", "foo"), + Reg(0602, "./b", "bar"), + Reg(0603, "./a", "baz"), + }), + extract: map[string][]deb.ExtractInfo{ + "/*": []deb.ExtractInfo{{Path: "/*"}}, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("foo")}, + {"create", "/a", "/a", "", 0601}, + {"data", "/b", 3, []byte("bar")}, + {"create", "/b", "/b", "", 0602}, + {"data", "/a", 3, []byte("baz")}, + {"create", "/a", "/a", "", 0603}, + }, +}, { + summary: "Source with multiple targets", + pkgdata: testutil.MustMakeDeb([]testutil.TarEntry{ + Reg(0601, "./a", "aaa"), + Reg(0602, "./b", "bu bu bu"), + }), + extract: map[string][]deb.ExtractInfo{ + "/a": []deb.ExtractInfo{{Path: "/b"}}, + "/b": []deb.ExtractInfo{ + {Path: "/c", Mode: 0603}, + {Path: "/d"}, + }, + }, + callbacks: [][]any{ + {"data", "/a", 3, []byte("aaa")}, + {"create", "/a", "/b", "", 0601}, + {"data", "/b", 8, []byte("bu bu bu")}, + {"create", "/b", "/c", "", 0603}, + {"create", "/b", "/d", "", 0602}, + }, +}} + +func (s *S) TestExtractCallbacks(c *C) { + for _, test := range callbackTests { + c.Logf("Test: %s", test.summary) + dir := c.MkDir() + var callbacks [][]any + onData := func(source string, size int64) (deb.ConsumeData, error) { + if test.noConsume { + args := []any{"data", source, int(size), nil} + callbacks = append(callbacks, args) + return nil, nil + } + consume := func(reader io.Reader) error { + data, err := io.ReadAll(reader) + if err != nil { + return err + } + args := []any{"data", source, int(size), data} + callbacks = append(callbacks, args) + return nil + } + return consume, nil + } + if test.noData { + onData = nil + } + onCreate := func(source, target, link string, mode fs.FileMode) error { + modeInt := int(07777 & mode) + args := []any{"create", source, target, link, modeInt} + callbacks = append(callbacks, args) + return nil + } + options := deb.ExtractOptions{ + Package: "test", + TargetDir: dir, + Extract: test.extract, + OnData: onData, + OnCreate: onCreate, + } + err := deb.Extract(bytes.NewBuffer(test.pkgdata), &options) + c.Assert(err, IsNil) + c.Assert(callbacks, DeepEquals, test.callbacks) + } +}