Skip to content

Commit

Permalink
Add scan config option to extract individual files.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 628062325
  • Loading branch information
erikvarga authored and copybara-github committed Apr 29, 2024
1 parent 2cc083f commit 54adce6
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 42 deletions.
2 changes: 1 addition & 1 deletion binary/scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func main() {

func parseFlags() *cli.Flags {
// TODO(b/279138598): Make this OS-agnostic, e.g. don't use the unix-specific root path.
root := flag.String("root", "/", `The root directory to start all extractions/detections from (e.g.: "/", "c:\" or ".")`)
root := flag.String("root", "/", `The root dir used by detectors and by file walking during extraction (e.g.: "/", "c:\" or ".")`)
resultFile := flag.String("result", "", "The path of the output scan result file")
var output cli.Array
flag.Var(&output, "o", "The path of the scanner outputs in various formats, e.g. -o textproto=result.textproto -o spdx23-json=result.spdx.json")
Expand Down
80 changes: 56 additions & 24 deletions extractor/extractor.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ type InventoryExtractor interface {

// ScanInput describes one file to extract from.
type ScanInput struct {
// The path of the file to extract, relative to ScanRoot.
Path string
// The root directory to start all extractions from.
// The root directory where the extraction file walking started from.
ScanRoot string
Info fs.FileInfo
// A reader for accessing contents of the file.
Expand All @@ -67,11 +68,17 @@ type Config struct {
Extractors []InventoryExtractor
ScanRoot string
FS fs.FS
// Directories that the file system walk should ignore. Note that this is not
// relative to ScanRoot and thus needs to be a sub-directory of ScanRoot.
// Optional: Individual files to extract inventory from. If specified, the
// extractors will only look at these files during the filesystem traversal.
// Note that these are not relative to ScanRoot and thus need to be in
// sub-directories of ScanRoot.
FilesToExtract []string
// Optional: Directories that the file system walk should ignore.
// Note that these are not relative to ScanRoot and thus need to be
// sub-directories of ScanRoot.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
// If the regex matches a directory, it will be skipped.
// Optional: If the regex matches a directory, it will be skipped.
SkipDirRegex *regexp.Regexp
// Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded.
Stats stats.Collector
Expand Down Expand Up @@ -125,21 +132,26 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
if err != nil {
return nil, nil, err
}
filesToExtract, err := stripPathPrefix(config.FilesToExtract, scanRoot)
if err != nil {
return nil, nil, err
}
dirsToSkip, err := stripPathPrefix(config.DirsToSkip, scanRoot)
if err != nil {
return nil, nil, err
}
wc := walkContext{
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: scanRoot,
dirsToSkip: stringListToMap(dirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,
ctx: ctx,
stats: config.Stats,
extractors: config.Extractors,
fs: config.FS,
scanRoot: scanRoot,
filesToExtract: filesToExtract,
dirsToSkip: stringListToMap(dirsToSkip),
skipDirRegex: config.SkipDirRegex,
readSymlinks: config.ReadSymlinks,
maxInodes: config.MaxInodes,
inodesVisited: 0,

lastStatus: time.Now(),

Expand All @@ -151,7 +163,11 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
mapExtracts: make(map[string]int),
}

err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
if len(wc.filesToExtract) > 0 {
err = walkIndividualFiles(config.FS, wc.filesToExtract, wc.handleFile)
} else {
err = internal.WalkDirUnsorted(config.FS, ".", wc.handleFile)
}

log.Infof("End status: %d inodes visited, %d Extract calls, %s elapsed",
wc.inodesVisited, wc.extractCalls, time.Since(start))
Expand All @@ -161,15 +177,16 @@ func RunFS(ctx context.Context, config *Config) ([]*Inventory, []*plugin.Status,
}

type walkContext struct {
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int
ctx context.Context
stats stats.Collector
extractors []InventoryExtractor
fs fs.FS
scanRoot string
filesToExtract []string
dirsToSkip map[string]bool // Anything under these paths should be skipped.
skipDirRegex *regexp.Regexp
maxInodes int
inodesVisited int

// Inventories found.
inventory []*Inventory
Expand All @@ -191,6 +208,21 @@ type walkContext struct {
mapExtracts map[string]int
}

func walkIndividualFiles(fsys fs.FS, paths []string, fn fs.WalkDirFunc) error {
for _, p := range paths {
info, err := fs.Stat(fsys, p)
if err != nil {
err = fn(p, nil, err)
} else {
err = fn(p, fs.FileInfoToDirEntry(info), nil)
}
if err != nil {
return err
}
}
return nil
}

func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error {
wc.printStatus(path)

Expand Down
67 changes: 60 additions & 7 deletions extractor/extractor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ func TestRun(t *testing.T) {
testCases := []struct {
desc string
ex []extractor.InventoryExtractor
filesToExtract []string
dirsToSkip []string
skipDirRegex string
maxInodes int
Expand Down Expand Up @@ -231,6 +232,57 @@ func TestRun(t *testing.T) {
},
wantInodeCount: 6,
},
{
desc: "Extract specific file",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
filesToExtract: []string{path2},
wantInv: []*extractor.Inventory{
&extractor.Inventory{
Name: name2,
Locations: []string{path2},
Extractor: "ex2",
},
},
wantStatus: []*plugin.Status{
&plugin.Status{Name: "ex1", Version: 1, Status: success},
&plugin.Status{Name: "ex2", Version: 2, Status: success},
},
wantInodeCount: 1,
},
{
desc: "Extract specific file with absolute path",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
// ScanRoot is CWD
filesToExtract: []string{path.Join(cwd, path2)},
wantInv: []*extractor.Inventory{
&extractor.Inventory{
Name: name2,
Locations: []string{path2},
Extractor: "ex2",
},
},
wantStatus: []*plugin.Status{
&plugin.Status{Name: "ex1", Version: 1, Status: success},
&plugin.Status{Name: "ex2", Version: 2, Status: success},
},
wantInodeCount: 1,
},
{
desc: "Extract specific file not relative to ScanRoot",
ex: []extractor.InventoryExtractor{
fe.New("ex1", 1, []string{path1}, map[string]fe.NamesErr{path1: {Names: []string{name1}, Err: nil}}),
fe.New("ex2", 2, []string{path2}, map[string]fe.NamesErr{path2: {Names: []string{name2}, Err: nil}}),
},
// ScanRoot is CWD, filepath is in its parent dir.
filesToExtract: []string{path.Join(filepath.Dir(cwd), path2)},
wantErr: cmpopts.AnyError,
},
{
desc: "nil result",
ex: []extractor.InventoryExtractor{
Expand Down Expand Up @@ -342,13 +394,14 @@ func TestRun(t *testing.T) {
skipDirRegex = regexp.MustCompile(tc.skipDirRegex)
}
config := &extractor.Config{
Extractors: tc.ex,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
Extractors: tc.ex,
FilesToExtract: tc.filesToExtract,
DirsToSkip: tc.dirsToSkip,
SkipDirRegex: skipDirRegex,
MaxInodes: tc.maxInodes,
ScanRoot: ".",
FS: fsys,
Stats: fc,
}
gotInv, gotStatus, err := extractor.RunFS(context.Background(), config)
if diff := cmp.Diff(tc.wantErr, err, cmpopts.EquateErrors()); diff != "" {
Expand Down
31 changes: 21 additions & 10 deletions scalibr.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,19 @@ func New() *Scanner { return &Scanner{} }
type ScanConfig struct {
InventoryExtractors []extractor.InventoryExtractor
Detectors []detector.Detector
ScanRoot string
// Directories that the file system walk should ignore. Note that this is not
// relative to ScanRoot and thus needs to be a sub-directory of ScanRoot.
// ScanRoot is the root dir used by file walking during extraction.
// All extractors and detectors will assume files are relative to this dir.
// Example use case: Scanning a container image or source code repo that is
// mounted to a local dir.
ScanRoot string
// Optional: Individual files to extract inventory from. If specified, the
// extractors will only look at these files during the filesystem traversal.
// Note that these are not relative to ScanRoot and thus need to be in
// sub-directories of ScanRoot.
FilesToExtract []string
// Optional: Directories that the file system walk should ignore.
// Note that these are not relative to ScanRoot and thus need to be
// sub-directories of ScanRoot.
// TODO(b/279413691): Also skip local paths, e.g. "Skip all .git dirs"
DirsToSkip []string
// Optional: If the regex matches a directory, it will be skipped.
Expand Down Expand Up @@ -88,13 +98,14 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) {
Findings: []*detector.Finding{},
}
extractorConfig := &extractor.Config{
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
Stats: config.Stats,
ReadSymlinks: config.ReadSymlinks,
Extractors: config.InventoryExtractors,
FilesToExtract: config.FilesToExtract,
DirsToSkip: config.DirsToSkip,
SkipDirRegex: config.SkipDirRegex,
ScanRoot: config.ScanRoot,
MaxInodes: config.MaxInodes,
}
inventories, extractorStatus, err := extractor.Run(ctx, extractorConfig)
sro.Inventories = inventories
Expand Down

0 comments on commit 54adce6

Please sign in to comment.