Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add scan config option to extract individual files. #25

Merged
merged 1 commit into from
Apr 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion binary/scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func main() {

func parseFlags() *cli.Flags {
// TODO(b/279138598): Make this OS-agnostic, e.g. don't use the unix-specific root path.
root := flag.String("root", "/", `The root directory to start all extractions/detections from (e.g.: "/", "c:\" or ".")`)
root := flag.String("root", "/", `The root dir used by detectors and by file walking during extraction (e.g.: "/", "c:\" or ".")`)
resultFile := flag.String("result", "", "The path of the output scan result file")
var output cli.Array
flag.Var(&output, "o", "The path of the scanner outputs in various formats, e.g. -o textproto=result.textproto -o spdx23-json=result.spdx.json")
Expand Down
80 changes: 56 additions & 24 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ type InventoryExtractor interface {

// ScanInput describes one file to extract from.
type ScanInput struct {
// The path of the file to extract, relative to ScanRoot.
Path string
// The root directory to start all extractions from.
// The root directory where the extraction file walking started from.
ScanRoot string
Info fs.FileInfo
// A reader for accessing contents of the file.
Expand All @@ -67,11 +68,17 @@ type Config struct {
Extractors []InventoryExtractor
ScanRoot string
FS fs.FS
// Directories that the file system walk should ignore. Note that this is not
// relative to ScanRoot and thus needs to be a sub-directory of ScanRoot.
// Optional: Individual files to extract inventory from. If specified, the
// extractors will only look at these files during the filesystem traversal.
// Note that these are not relative to ScanRoot and thus need to be in
// sub-directories of ScanRoot.
FilesToExtract []string
// Optional: Directories that the file system walk should ignore.
// Note that these are not relative to ScanRoot and thus need to be
// sub-directories of ScanRoot.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
// If the regex matches a directory, it will be skipped.
// Optional: If the regex matches a directory, it will be skipped.
SkipDirRegex *regexp.Regexp
// Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded.
Stats stats.Collector
Expand Down Expand Up @@ -125,21 +132,26 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
if err != nil {
return nil, nil, err
}
filesToExtract, err := stripPathPrefix(config.FilesToExtract, scanRoot)
if err != nil {
return nil, nil, err
}
dirsToSkip, err := stripPathPrefix(config.DirsToSkip, scanRoot)
if err != nil {
return nil, nil, err
}
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: scanRoot,
dirsToSkip: stringListToMap(dirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: scanRoot,
filesToExtract: filesToExtract,
dirsToSkip: stringListToMap(dirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -151,7 +163,11 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
mapExtracts: make(map[string]int),
}

err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
if len(wc.filesToExtract) > 0 {
err = walkIndividualFiles(config.FS, wc.filesToExtract, wc.handleFile)
} else {
err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
}

log.Infof("End status: %d inodes visited, %d Extract calls, %s elapsed",
wc.inodesVisited, wc.extractCalls, time.Since(start))
Expand All @@ -161,15 +177,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
filesToExtract []string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int

// Inventories found.
inventory []*Inventory
Expand All @@ -191,6 +208,21 @@ type walkContext struct {
mapExtracts map[string]int
}

func walkIndividualFiles(fsys fs.FS, paths []string, fn fs.WalkDirFunc) error {
for _, p := range paths {
info, err := fs.Stat(fsys, p)
if err != nil {
err = fn(p, nil, err)
} else {
err = fn(p, fs.FileInfoToDirEntry(info), nil)
}
if err != nil {
return err
}
}
return nil
}

func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error {
wc.printStatus(path)

Expand Down
67 changes: 60 additions & 7 deletions extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func TestRun(t *testing.T) {
testCases := []struct {
desc string
ex []extractor.InventoryExtractor
filesToExtract []string
dirsToSkip []string
skipDirRegex string
maxInodes int
Expand Down Expand Up @@ -231,6 +232,57 @@ func TestRun(t *testing.T) {
},
wantInodeCount: 6,
},
{
desc: "Extract specific file",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
filesToExtract: []string{path2},
wantInv: []*extractor.Inventory{
&extractor.Inventory{
Name: name2,
Locations: []string{path2},
Extractor: "ex2",
},
},
wantStatus: []*plugin.Status{
&plugin.Status{Name: "ex1", Version: 1, Status: success},
&plugin.Status{Name: "ex2", Version: 2, Status: success},
},
wantInodeCount: 1,
},
{
desc: "Extract specific file with absolute path",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
// ScanRoot is CWD
filesToExtract: []string{path.Join(cwd, path2)},
wantInv: []*extractor.Inventory{
&extractor.Inventory{
Name: name2,
Locations: []string{path2},
Extractor: "ex2",
},
},
wantStatus: []*plugin.Status{
&plugin.Status{Name: "ex1", Version: 1, Status: success},
&plugin.Status{Name: "ex2", Version: 2, Status: success},
},
wantInodeCount: 1,
},
{
desc: "Extract specific file not relative to ScanRoot",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
// ScanRoot is CWD, filepath is in its parent dir.
filesToExtract: []string{path.Join(filepath.Dir(cwd), path2)},
wantErr: cmpopts.AnyError,
},
{
desc: "nil result",
ex: []extractor.InventoryExtractor{
Expand Down Expand Up @@ -342,13 +394,14 @@ func TestRun(t *testing.T) {
skipDirRegex = regexp.MustCompile(tc.skipDirRegex)
}
config := &extractor.Config{
Extractors: tc.ex,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
Extractors: tc.ex,
FilesToExtract: tc.filesToExtract,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
}
gotInv, gotStatus, err := extractor.RunFS(context.Background(), config)
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
Expand Down
31 changes: 21 additions & 10 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,19 @@ func New() *Scanner { return &Scanner{} }
type ScanConfig struct {
InventoryExtractors []extractor.InventoryExtractor
Detectors []detector.Detector
ScanRoot string
// Directories that the file system walk should ignore. Note that this is not
// relative to ScanRoot and thus needs to be a sub-directory of ScanRoot.
// ScanRoot is the root dir used by file walking during extraction.
// All extractors and detectors will assume files are relative to this dir.
// Example use case: Scanning a container image or source code repo that is
// mounted to a local dir.
ScanRoot string
// Optional: Individual files to extract inventory from. If specified, the
// extractors will only look at these files during the filesystem traversal.
// Note that these are not relative to ScanRoot and thus need to be in
// sub-directories of ScanRoot.
FilesToExtract []string
// Optional: Directories that the file system walk should ignore.
// Note that these are not relative to ScanRoot and thus need to be
// sub-directories of ScanRoot.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
// Optional: If the regex matches a directory, it will be skipped.
Expand Down Expand Up @@ -88,13 +98,14 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
Findings: []*detector.Finding{},
}
extractorConfig := &extractor.Config{
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
FilesToExtract: config.FilesToExtract,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down
Loading