diff --git a/extractor/filesystem/filesystem.go b/extractor/filesystem/filesystem.go index dc07ece4..bb564eec 100644 --- a/extractor/filesystem/filesystem.go +++ b/extractor/filesystem/filesystem.go @@ -17,6 +17,7 @@ package filesystem import ( "context" + "errors" "fmt" "io" "io/fs" @@ -38,7 +39,10 @@ import ( var ( // ErrNotRelativeToScanRoots is returned when one of the file or directory to be retrieved or // skipped is not relative to any of the scan roots. - ErrNotRelativeToScanRoots = fmt.Errorf("path not relative to any of the scan roots") + ErrNotRelativeToScanRoots = errors.New("path not relative to any of the scan roots") + // ErrSkipFile is returned by the custom callback function to indicate that the current file + // should not be parsed by any extractors. + ErrSkipFile = errors.New("skip this file") ) // Extractor is the filesystem-based inventory extraction plugin, used to extract inventory data @@ -95,6 +99,12 @@ type Config struct { SkipDirGlob glob.Glob // Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded. Stats stats.Collector + // Optional: A callback to run on every file and directory before extraction. + // The following error returns have special meaning: + // filesystem.ErrSkipFile - Skip the current file, i.e. don't run any extraction on it. + // fs.SkipDir - Skip the current file and remaining files in the current directory. + // fs.SkipAll - Skip the current file and all remaining files on the filesystem. + BeforeExtraction BeforeExtractionCallback // Optional: Whether to read symlinks. ReadSymlinks bool // Optional: Limit for visited inodes. If 0, no limit is applied. @@ -106,6 +116,9 @@ type Config struct { PrintDurationAnalysis bool } +// BeforeExtractionCallback is the function type that is called before the extraction of each file. +type BeforeExtractionCallback func(path string) error + // Run runs the specified extractors and returns their extraction results, // as well as info about whether the plugin runs completed successfully. func Run(ctx context.Context, config *Config) ([]*extractor.Inventory, []*plugin.Status, error) { @@ -171,6 +184,7 @@ func InitWalkContext(ctx context.Context, config *Config, absScanRoots []*scalib return &walkContext{ ctx: ctx, stats: config.Stats, + beforeExtraction: config.BeforeExtraction, extractors: config.Extractors, filesToExtract: filesToExtract, dirsToSkip: pathStringListToMap(dirsToSkip), @@ -236,6 +250,7 @@ func RunFS(ctx context.Context, config *Config, wc *walkContext) ([]*extractor.I type walkContext struct { ctx context.Context stats stats.Collector + beforeExtraction BeforeExtractionCallback extractors []Extractor fs scalibrfs.FS scanRoot string @@ -303,8 +318,24 @@ func (wc *walkContext) handleFile(path string, d fs.DirEntry, fserr error) error } return nil } + if d.Type().IsDir() { wc.dirsVisited++ + } + + if wc.beforeExtraction != nil { + if err := wc.beforeExtraction(path); err != nil { + // Skip parsing this file. + if err == ErrSkipFile { + return nil + } + // The callback can return fs.SkipDir or fs.SkipAll to instruct SCALIBR to + // skip the remaining dir or all remaining files. + return err + } + } + + if d.Type().IsDir() { if wc.shouldSkipDir(path) { // Skip everything inside this dir. return fs.SkipDir } diff --git a/scalibr.go b/scalibr.go index 5c64e03f..0143e428 100644 --- a/scalibr.go +++ b/scalibr.go @@ -84,6 +84,12 @@ type ScanConfig struct { SkipDirGlob glob.Glob // Optional: stats allows to enter a metric hook. If left nil, no metrics will be recorded. Stats stats.Collector + // Optional: A callback to run on every file and directory before extraction. + // The following error returns have special meaning: + // filesystem.ErrSkipFile - Skip the current file, i.e. don't run any extraction on it. + // fs.SkipDir - Skip the current file and remaining files in the current directory. + // fs.SkipAll - Skip the current file and all remaining files on the filesystem. + BeforeExtraction filesystem.BeforeExtractionCallback // Optional: Whether to read symlinks. ReadSymlinks bool // Optional: Limit for visited inodes. If 0, no limit is applied. @@ -194,6 +200,7 @@ func (Scanner) Scan(ctx context.Context, config *ScanConfig) (sr *ScanResult) { } extractorConfig := &filesystem.Config{ Stats: config.Stats, + BeforeExtraction: config.BeforeExtraction, ReadSymlinks: config.ReadSymlinks, Extractors: config.FilesystemExtractors, FilesToExtract: config.FilesToExtract,