Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rpm: add function to determine if paths are RPM #1473

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ type FileKind string

const (
FileKindWhiteout = FileKind("whiteout")
FileKindRPM = FileKind("rpm")
)

// File represents interesting files that are found in the layer.
Expand Down
16 changes: 15 additions & 1 deletion gobin/gobin.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (

"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/rpm"
)

// Detector detects go binaries and reports the packages used to build them.
Expand Down Expand Up @@ -86,7 +87,8 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
// Only create a single spool file per call, re-use for every binary.
var spool spoolfile
walk := func(p string, d fs.DirEntry, err error) error {
ctx := zlog.ContextWithValues(ctx, "path", d.Name())
ctx := zlog.ContextWithValues(ctx, "filename", d.Name())

switch {
case err != nil:
return err
Expand All @@ -107,6 +109,18 @@ func (Detector) Scan(ctx context.Context, l *claircore.Layer) ([]*claircore.Pack
// Not executable
return nil
}

isRPM, err := rpm.FileInstalledByRPM(ctx, l, p)
if err != nil {
return err
}
if isRPM {
zlog.Debug(ctx).
Str("path", p).
Msg("file path determined to be of RPM origin")
return nil
}

f, err := sys.Open(p)
if err != nil {
// TODO(crozzy): Remove log line once controller is in a
Expand Down
11 changes: 11 additions & 0 deletions gobin/gobin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ func TestEmptyFile(t *testing.T) {
if err := l.Init(ctx, &test.AnyDescription, f); err != nil {
t.Error(err)
}
t.Cleanup(func() {
if err := l.Close(); err != nil {
t.Error(err)
}
})

var s Detector
_, err = s.Scan(ctx, &l)
if err != nil {
Expand Down Expand Up @@ -138,6 +144,11 @@ func TestScanner(t *testing.T) {
if err := l.Init(ctx, &test.AnyDescription, f); err != nil {
t.Error(err)
}
t.Cleanup(func() {
if err := l.Close(); err != nil {
t.Error(err)
}
})

// Run the scanner on the fake layer.
var s Detector
Expand Down
12 changes: 12 additions & 0 deletions java/packagescanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/java/jar"
"github.com/quay/claircore/rpm"
)

var (
Expand Down Expand Up @@ -147,6 +148,17 @@ func (s *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircor
defer putBuf(buf)
for _, n := range ars {
ctx := zlog.ContextWithValues(ctx, "file", n)
isRPM, err := rpm.FileInstalledByRPM(ctx, layer, n)
if err != nil {
return nil, err
}
if isRPM {
zlog.Debug(ctx).
Str("path", n).
Msg("file path determined to be of RPM origin")
continue
}

sh.Reset()
buf.Reset()
// Calculate the SHA1 as it's buffered, since it may be needed for
Expand Down
12 changes: 12 additions & 0 deletions nodejs/packagescanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/Masterminds/semver"
"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/rpm"
)

const repository = "npm"
Expand Down Expand Up @@ -92,6 +93,17 @@ func (s *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircor
ret := make([]*claircore.Package, 0, len(pkgs))
var invalidPkgs []string
for _, p := range pkgs {
isRPM, err := rpm.FileInstalledByRPM(ctx, layer, p)
if err != nil {
return nil, err
}
if isRPM {
zlog.Debug(ctx).
Str("path", p).
Msg("file path determined to be of RPM origin")
continue
}

f, err := sys.Open(p)
if err != nil {
return nil, fmt.Errorf("nodejs: unable to open file %q: %w", p, err)
Expand Down
19 changes: 15 additions & 4 deletions python/packagescanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"github.com/quay/claircore"
"github.com/quay/claircore/indexer"
"github.com/quay/claircore/pkg/pep440"
"github.com/quay/claircore/rpm"
)

var (
Expand Down Expand Up @@ -79,6 +80,16 @@ func (ps *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*clairco
}
var ret []*claircore.Package
for _, n := range ms {
isRPM, err := rpm.FileInstalledByRPM(ctx, layer, n)
if err != nil {
return nil, err
}
if isRPM {
zlog.Debug(ctx).
Str("path", n).
Msg("file path determined to be of RPM origin")
continue
}
b, err := fs.ReadFile(sys, n)
if err != nil {
return nil, fmt.Errorf("python: unable to read file: %w", err)
Expand Down Expand Up @@ -143,14 +154,14 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
// Is this layer an rpm layer?
//
// If so, files in the disto-managed directory can be skipped.
var rpm bool
var isRPM bool
for _, p := range []string{
"var/lib/rpm/Packages",
"var/lib/rpm/rpmdb.sqlite",
"var/lib/rpm/Packages.db",
} {
if fi, err := fs.Stat(sys, p); err == nil && fi.Mode().IsRegular() {
rpm = true
isRPM = true
break
}
}
Expand All @@ -172,12 +183,12 @@ func findDeliciousEgg(ctx context.Context, sys fs.FS) (out []string, err error)
switch {
case err != nil:
return err
case (rpm || dpkg) && d.Type().IsDir():
case (isRPM || dpkg) && d.Type().IsDir():
// Skip one level up from the "packages" directory so the walk also
// skips the standard library.
var pat string
switch {
case rpm:
case isRPM:
pat = `usr/lib*/python[23].*`
ev = ev.Bool("rpm_dir", true)
case dpkg:
Expand Down
142 changes: 142 additions & 0 deletions rpm/files.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
package rpm

import (
"context"
"errors"
"fmt"
"io/fs"
"sync"

"github.com/quay/claircore"
"github.com/quay/zlog"
"golang.org/x/sync/singleflight"
)

// FilesCache is used for concurrent access to the map containing
// [claircore.Layer] -> map[string]struct{}.
type filesCache struct {
c map[string]map[string]struct{}
refs map[string]int
mu sync.Mutex
sf singleflight.Group
// This wg is added so tests can wait for goroutines to finish to
// be able to confirm functionality.
wg sync.WaitGroup
}

var fc = &filesCache{
c: map[string]map[string]struct{}{},
refs: map[string]int{},
}

// GC decrements the reference counter and deletes the [claircore.Layer]'s
// entry from the cache map and the reference counter once the references
// are 0 (or less).
func (fc *filesCache) gc(key string) {
fc.mu.Lock()
defer fc.mu.Unlock()
fc.refs[key]--
if fc.refs[key] <= 0 {
delete(fc.c, key)
delete(fc.refs, key)
fc.sf.Forget(key)
}
}

// Get increments the reference counter for the key and looks up the key
// in the cache. It will subsequently start a goroutine to track when the
// calling context is done, calling the gc method.
func (fc *filesCache) get(ctx context.Context, key string) (map[string]struct{}, bool) {
fc.mu.Lock()
defer fc.mu.Unlock()
fc.refs[key]++
f, ok := fc.c[key]
fc.wg.Add(1)
go func() {
defer fc.wg.Done()
<-ctx.Done()
fc.gc(key)
}()
return f, ok
}

// Set sets the files for a particular key and deals with the locking.
func (fc *filesCache) set(key string, files map[string]struct{}) {
fc.mu.Lock()
defer fc.mu.Unlock()
fc.c[key] = files
}

var errNoDBFound = errors.New("no RPM DB found")

// GetFiles looks up RPM files that exist in the RPM database using the
// filesFromDB function and memorizes the result to avoid repeated work
// for the same [claircore.Layer].
func (fc *filesCache) getFiles(ctx context.Context, layer *claircore.Layer) (map[string]struct{}, error) {
key := layer.Hash.String()
if files, ok := fc.get(ctx, key); ok {
return files, nil
}

files := map[string]struct{}{}
ch := fc.sf.DoChan(key, func() (interface{}, error) {
sys, err := layer.FS()
if err != nil {
return nil, fmt.Errorf("rpm: unable to open layer: %w", err)
}

found := make([]foundDB, 0)
if err := fs.WalkDir(sys, ".", findDBs(ctx, &found, sys)); err != nil {
return nil, fmt.Errorf("rpm: error walking fs: %w", err)
}
if len(found) == 0 {
return nil, errNoDBFound
}

done := map[string]struct{}{}
zlog.Debug(ctx).Int("count", len(found)).Msg("found possible databases")
for _, db := range found {
ctx := zlog.ContextWithValues(ctx, "db", db.String())
zlog.Debug(ctx).Msg("examining database")
if _, ok := done[db.Path]; ok {
zlog.Debug(ctx).Msg("already seen, skipping")
continue
}
done[db.Path] = struct{}{}
fs, err := getDBObjects(ctx, sys, db, filesFromDB)
if err != nil {
return nil, fmt.Errorf("rpm: error getting native DBs: %w", err)
}
for _, f := range fs {
files[f.Path] = struct{}{}
}
}
fc.set(key, files)
return files, nil
})
select {
case <-ctx.Done():
return nil, context.Cause(ctx)
case res := <-ch:
switch {
case res.Err == nil:
files = res.Val.(map[string]struct{})
case errors.Is(res.Err, errNoDBFound):
default:
return nil, res.Err
}
}

return files, nil
}

// FileInstalledByRPM takes a [claircore.Layer] and filepath string and
// returns a boolean signifying whether that file came from an RPM package.
func FileInstalledByRPM(ctx context.Context, layer *claircore.Layer, filepath string) (bool, error) {
files, err := fc.getFiles(ctx, layer)
if err != nil {
return false, err
}
_, exists := files[filepath]
return exists, nil
}
Loading
Loading