diff --git a/binary/scalibr.go b/binary/scalibr.go index 85118d87..508d7401 100644 --- a/binary/scalibr.go +++ b/binary/scalibr.go @@ -32,7 +32,7 @@ func main() { func parseFlags() *cli.Flags { // TODO(b/279138598): Make this OS-agnostic, e.g. don't use the unix-specific root path. - root := flag.String("root", "/", `The root directory to start all extractions/detections from (e.g.: "/", "c:\" or ".")`) + root := flag.String("root", "/", `The root dir used by detectors and by file walking during extraction (e.g.: "/", "c:\" or ".")`) resultFile := flag.String("result", "", "The path of the output scan result file") var output cli.Array flag.Var(&output, "o", "The path of the scanner outputs in various formats, e.g. -o textproto=result.textproto -o spdx23-json=result.spdx.json") diff --git a/extractor/extractor.go b/extractor/extractor.go index da9b1a72..2ab2b05b 100644 --- a/extractor/extractor.go +++ b/extractor/extractor.go @@ -53,8 +53,9 @@ type InventoryExtractor interface { // ScanInput describes one file to extract from. type ScanInput struct { + // The path of the file to extract, relative to ScanRoot. Path string - // The root directory to start all extractions from. + // The root directory where the extraction file walking started from. ScanRoot string Info fs.FileInfo // A reader for accessing contents of the file. @@ -67,11 +68,17 @@ type Config struct { Extractors []InventoryExtractor ScanRoot string FS fs.FS - // Directories that the file system walk should ignore. Note that this is not - // relative to ScanRoot and thus needs to be a sub-directory of ScanRoot. + // Optional: Individual files to extract inventory from. If specified, the + // extractors will only look at these files during the filesystem traversal. + // Note that these are not relative to ScanRoot and thus need to be in + // sub-directories of ScanRoot. + FilesToExtract []string + // Optional: Directories that the file system walk should ignore. + // Note that these are not relative to ScanRoot and thus need to be + // sub-directories of ScanRoot. // TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs" DirsToSkip []string - // If the regex matches a directory, it will be skipped. + // Optional: If the regex matches a directory, it will be skipped. SkipDirRegex *regexp.Regexp // Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded. Stats stats.Collector @@ -125,21 +132,26 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status, if err != nil { return nil, nil, err } + filesToExtract, err := stripPathPrefix(config.FilesToExtract, scanRoot) + if err != nil { + return nil, nil, err + } dirsToSkip, err := stripPathPrefix(config.DirsToSkip, scanRoot) if err != nil { return nil, nil, err } wc := walkContext{ - ctx: ctx, - stats: config.Stats, - extractors: config.Extractors, - fs: config.FS, - scanRoot: scanRoot, - dirsToSkip: stringListToMap(dirsToSkip), - skipDirRegex: config.SkipDirRegex, - readSymlinks: config.ReadSymlinks, - maxInodes: config.MaxInodes, - inodesVisited: 0, + ctx: ctx, + stats: config.Stats, + extractors: config.Extractors, + fs: config.FS, + scanRoot: scanRoot, + filesToExtract: filesToExtract, + dirsToSkip: stringListToMap(dirsToSkip), + skipDirRegex: config.SkipDirRegex, + readSymlinks: config.ReadSymlinks, + maxInodes: config.MaxInodes, + inodesVisited: 0, lastStatus: time.Now(), @@ -151,7 +163,11 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status, mapExtracts: make(map[string]int), } - err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile) + if len(wc.filesToExtract) > 0 { + err = walkIndividualFiles(config.FS, wc.filesToExtract, wc.handleFile) + } else { + err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile) + } log.Infof("End status: %d inodes visited, %d Extract calls, %s elapsed", wc.inodesVisited, wc.extractCalls, time.Since(start)) @@ -161,15 +177,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status, } type walkContext struct { - ctx context.Context - stats stats.Collector - extractors []InventoryExtractor - fs fs.FS - scanRoot string - dirsToSkip map[string]bool // Anything under these paths should be skipped. - skipDirRegex *regexp.Regexp - maxInodes int - inodesVisited int + ctx context.Context + stats stats.Collector + extractors []InventoryExtractor + fs fs.FS + scanRoot string + filesToExtract []string + dirsToSkip map[string]bool // Anything under these paths should be skipped. + skipDirRegex *regexp.Regexp + maxInodes int + inodesVisited int // Inventories found. inventory []*Inventory @@ -191,6 +208,21 @@ type walkContext struct { mapExtracts map[string]int } +func walkIndividualFiles(fsys fs.FS, paths []string, fn fs.WalkDirFunc) error { + for _, p := range paths { + info, err := fs.Stat(fsys, p) + if err != nil { + err = fn(p, nil, err) + } else { + err = fn(p, fs.FileInfoToDirEntry(info), nil) + } + if err != nil { + return err + } + } + return nil +} + func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error { wc.printStatus(path) diff --git a/extractor/extractor_test.go b/extractor/extractor_test.go index e08b575b..039632aa 100644 --- a/extractor/extractor_test.go +++ b/extractor/extractor_test.go @@ -59,6 +59,7 @@ func TestRun(t *testing.T) { testCases := []struct { desc string ex []extractor.InventoryExtractor + filesToExtract []string dirsToSkip []string skipDirRegex string maxInodes int @@ -231,6 +232,57 @@ func TestRun(t *testing.T) { }, wantInodeCount: 6, }, + { + desc: "Extract specific file", + ex: []extractor.InventoryExtractor{ + fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}), + fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}), + }, + filesToExtract: []string{path2}, + wantInv: []*extractor.Inventory{ + &extractor.Inventory{ + Name: name2, + Locations: []string{path2}, + Extractor: "ex2", + }, + }, + wantStatus: []*plugin.Status{ + &plugin.Status{Name: "ex1", Version: 1, Status: success}, + &plugin.Status{Name: "ex2", Version: 2, Status: success}, + }, + wantInodeCount: 1, + }, + { + desc: "Extract specific file with absolute path", + ex: []extractor.InventoryExtractor{ + fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}), + fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}), + }, + // ScanRoot is CWD + filesToExtract: []string{path.Join(cwd, path2)}, + wantInv: []*extractor.Inventory{ + &extractor.Inventory{ + Name: name2, + Locations: []string{path2}, + Extractor: "ex2", + }, + }, + wantStatus: []*plugin.Status{ + &plugin.Status{Name: "ex1", Version: 1, Status: success}, + &plugin.Status{Name: "ex2", Version: 2, Status: success}, + }, + wantInodeCount: 1, + }, + { + desc: "Extract specific file not relative to ScanRoot", + ex: []extractor.InventoryExtractor{ + fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}), + fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}), + }, + // ScanRoot is CWD, filepath is in its parent dir. + filesToExtract: []string{path.Join(filepath.Dir(cwd), path2)}, + wantErr: cmpopts.AnyError, + }, { desc: "nil result", ex: []extractor.InventoryExtractor{ @@ -342,13 +394,14 @@ func TestRun(t *testing.T) { skipDirRegex = regexp.MustCompile(tc.skipDirRegex) } config := &extractor.Config{ - Extractors: tc.ex, - DirsToSkip: tc.dirsToSkip, - SkipDirRegex: skipDirRegex, - MaxInodes: tc.maxInodes, - ScanRoot: ".", - FS: fsys, - Stats: fc, + Extractors: tc.ex, + FilesToExtract: tc.filesToExtract, + DirsToSkip: tc.dirsToSkip, + SkipDirRegex: skipDirRegex, + MaxInodes: tc.maxInodes, + ScanRoot: ".", + FS: fsys, + Stats: fc, } gotInv, gotStatus, err := extractor.RunFS(context.Background(), config) if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" { diff --git a/scalibr.go b/scalibr.go index e5cbdae7..ab0ad9c2 100644 --- a/scalibr.go +++ b/scalibr.go @@ -42,9 +42,19 @@ func New() *Scanner { return &Scanner{} } type ScanConfig struct { InventoryExtractors []extractor.InventoryExtractor Detectors []detector.Detector - ScanRoot string - // Directories that the file system walk should ignore. Note that this is not - // relative to ScanRoot and thus needs to be a sub-directory of ScanRoot. + // ScanRoot is the root dir used by file walking during extraction. + // All extractors and detectors will assume files are relative to this dir. + // Example use case: Scanning a container image or source code repo that is + // mounted to a local dir. + ScanRoot string + // Optional: Individual files to extract inventory from. If specified, the + // extractors will only look at these files during the filesystem traversal. + // Note that these are not relative to ScanRoot and thus need to be in + // sub-directories of ScanRoot. + FilesToExtract []string + // Optional: Directories that the file system walk should ignore. + // Note that these are not relative to ScanRoot and thus need to be + // sub-directories of ScanRoot. // TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs" DirsToSkip []string // Optional: If the regex matches a directory, it will be skipped. @@ -88,13 +98,14 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) { Findings: []*detector.Finding{}, } extractorConfig := &extractor.Config{ - Stats: config.Stats, - ReadSymlinks: config.ReadSymlinks, - Extractors: config.InventoryExtractors, - DirsToSkip: config.DirsToSkip, - SkipDirRegex: config.SkipDirRegex, - ScanRoot: config.ScanRoot, - MaxInodes: config.MaxInodes, + Stats: config.Stats, + ReadSymlinks: config.ReadSymlinks, + Extractors: config.InventoryExtractors, + FilesToExtract: config.FilesToExtract, + DirsToSkip: config.DirsToSkip, + SkipDirRegex: config.SkipDirRegex, + ScanRoot: config.ScanRoot, + MaxInodes: config.MaxInodes, } inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig) sro.Inventories = inventories