Skip to content

Commit

Permalink
Add scan config option to extract individual files.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 628062325
  • Loading branch information
erikvarga authored and copybara-github committed Apr 25, 2024
1 parent 18557e3 commit 853a4bc
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 35 deletions.
65 changes: 45 additions & 20 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ type Config struct {
Extractors []InventoryExtractor
ScanRoot string
FS fs.FS
// Individual files to extract inventory from. If specified, the extractors will
// only look at these files and not traverse the filesystem.
FilesToExtract []string
// Directories that the file system walk should ignore, relative to the FS root.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
Expand Down Expand Up @@ -120,16 +123,17 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}
start := time.Now()
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: config.ScanRoot,
filesToExtract: config.FilesToExtract,
dirsToSkip: stringListToMap(config.DirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -141,7 +145,12 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
mapExtracts: make(map[string]int),
}

err := internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
var err error
if len(wc.filesToExtract) > 0 {
err = walkIndividualFiles(config.FS, wc.filesToExtract, wc.handleFile)
} else {
err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
}

log.Infof("End status: %d inodes visited, %d Extract calls, %s elapsed",
wc.inodesVisited, wc.extractCalls, time.Since(start))
Expand All @@ -151,15 +160,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
filesToExtract []string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int

// Inventories found.
inventory []*Inventory
Expand All @@ -181,6 +191,21 @@ type walkContext struct {
mapExtracts map[string]int
}

func walkIndividualFiles(fsys fs.FS, paths []string, fn fs.WalkDirFunc) error {
for _, p := range paths {
info, err := fs.Stat(fsys, p)
if err != nil {
err = fn(p, nil, err)
} else {
err = fn(p, fs.FileInfoToDirEntry(info), nil)
}
if err != nil {
return err
}
}
return nil
}

func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error {
wc.printStatus(path)

Expand Down
36 changes: 29 additions & 7 deletions extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ func TestRun(t *testing.T) {
testCases := []struct {
desc string
ex []extractor.InventoryExtractor
filesToExtract []string
dirsToSkip []string
skipDirRegex string
maxInodes int
Expand Down Expand Up @@ -192,6 +193,26 @@ func TestRun(t *testing.T) {
},
wantInodeCount: 6,
},
{
desc: "Extract specific file only",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
filesToExtract: []string{path2},
wantInv: []*extractor.Inventory{
&extractor.Inventory{
Name: name2,
Locations: []string{path2},
Extractor: "ex2",
},
},
wantStatus: []*plugin.Status{
&plugin.Status{Name: "ex1", Version: 1, Status: success},
&plugin.Status{Name: "ex2", Version: 2, Status: success},
},
wantInodeCount: 1,
},
{
desc: "nil result",
ex: []extractor.InventoryExtractor{
Expand Down Expand Up @@ -303,13 +324,14 @@ func TestRun(t *testing.T) {
skipDirRegex = regexp.MustCompile(tc.skipDirRegex)
}
config := &extractor.Config{
Extractors: tc.ex,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
Extractors: tc.ex,
FilesToExtract: tc.filesToExtract,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
}
gotInv, gotStatus, err := extractor.RunFS(context.Background(), config)
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
Expand Down
21 changes: 13 additions & 8 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@ func New() *Scanner { return &Scanner{} }
type ScanConfig struct {
InventoryExtractors []extractor.InventoryExtractor
Detectors []detector.Detector
ScanRoot string
// The root directory to start all extractions and detections from.
ScanRoot string
// Individual files to extract inventory from. If specified, the extractors will
// only look at these files and not traverse the filesystem.
FilesToExtract []string
// Directories that the file system walk should ignore, relative to the FS root.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
Expand Down Expand Up @@ -87,13 +91,14 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
Findings: []*detector.Finding{},
}
extractorConfig := &extractor.Config{
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
FilesToExtract: config.FilesToExtract,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down

0 comments on commit 853a4bc

Please sign in to comment.