From e6f45cd48f2c436336f3bc70c6b1b565ddff7707 Mon Sep 17 00:00:00 2001 From: DmitriyLewen <91113035+DmitriyLewen@users.noreply.github.com> Date: Mon, 16 Sep 2024 17:23:41 +0600 Subject: [PATCH] refactor: split `.egg` and `packaging` analyzers (#7514) --- pkg/fanal/analyzer/const.go | 9 +- .../analyzer/language/python/packaging/egg.go | 126 +++++++++++++++ .../language/python/packaging/egg_test.go | 146 ++++++++++++++++++ .../language/python/packaging/packaging.go | 111 ++++--------- .../python/packaging/packaging_test.go | 27 ---- .../sample_package.egg | Bin 0 -> 1135 bytes 6 files changed, 311 insertions(+), 108 deletions(-) create mode 100644 pkg/fanal/analyzer/language/python/packaging/egg.go create mode 100644 pkg/fanal/analyzer/language/python/packaging/egg_test.go create mode 100644 pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg diff --git a/pkg/fanal/analyzer/const.go b/pkg/fanal/analyzer/const.go index ea2108e89281..197c0033296e 100644 --- a/pkg/fanal/analyzer/const.go +++ b/pkg/fanal/analyzer/const.go @@ -75,10 +75,11 @@ const ( TypeCondaEnv Type = "conda-environment" // Python - TypePythonPkg Type = "python-pkg" - TypePip Type = "pip" - TypePipenv Type = "pipenv" - TypePoetry Type = "poetry" + TypePythonPkg Type = "python-pkg" + TypePythonPkgEgg Type = "python-egg" + TypePip Type = "pip" + TypePipenv Type = "pipenv" + TypePoetry Type = "poetry" // Go TypeGoBinary Type = "gobinary" diff --git a/pkg/fanal/analyzer/language/python/packaging/egg.go b/pkg/fanal/analyzer/language/python/packaging/egg.go new file mode 100644 index 000000000000..1de53980260e --- /dev/null +++ b/pkg/fanal/analyzer/language/python/packaging/egg.go @@ -0,0 +1,126 @@ +package packaging + +import ( + "archive/zip" + "context" + "io" + "os" + "path" + "path/filepath" + + "github.com/samber/lo" + "golang.org/x/xerrors" + + "github.com/aquasecurity/trivy/pkg/dependency/parser/python/packaging" + "github.com/aquasecurity/trivy/pkg/fanal/analyzer" + "github.com/aquasecurity/trivy/pkg/fanal/analyzer/language" + "github.com/aquasecurity/trivy/pkg/fanal/types" + "github.com/aquasecurity/trivy/pkg/log" + xio "github.com/aquasecurity/trivy/pkg/x/io" +) + +func init() { + analyzer.RegisterAnalyzer(&eggAnalyzer{}) +} + +const ( + eggAnalyzerVersion = 1 + eggExt = ".egg" +) + +type eggAnalyzer struct { + logger *log.Logger + licenseClassifierConfidenceLevel float64 +} + +func (a *eggAnalyzer) Init(opt analyzer.AnalyzerOptions) error { + a.logger = log.WithPrefix("python") + a.licenseClassifierConfidenceLevel = opt.LicenseScannerOption.ClassifierConfidenceLevel + return nil +} + +// Analyze analyzes egg archive files +func (a *eggAnalyzer) Analyze(_ context.Context, input analyzer.AnalysisInput) (*analyzer.AnalysisResult, error) { + // .egg file is zip format and PKG-INFO needs to be extracted from the zip file. + pkginfoInZip, err := findFileInZip(input.Content, input.Info.Size(), isEggFile) + if err != nil { + return nil, xerrors.Errorf("unable to open `.egg` archive: %w", err) + } + + // Egg archive may not contain required files, then we will get nil. Skip this archives + if pkginfoInZip == nil { + return nil, nil + } + + rsa, err := xio.NewReadSeekerAt(pkginfoInZip) + if err != nil { + return nil, xerrors.Errorf("unable to convert PKG-INFO reader: %w", err) + } + + app, err := language.ParsePackage(types.PythonPkg, input.FilePath, rsa, packaging.NewParser(), input.Options.FileChecksum) + if err != nil { + return nil, xerrors.Errorf("parse error: %w", err) + } else if app == nil { + return nil, nil + } + + opener := func(licPath string) (io.ReadCloser, error) { + required := func(filePath string) bool { + return path.Base(filePath) == licPath + } + + f, err := findFileInZip(input.Content, input.Info.Size(), required) + if err != nil { + return nil, xerrors.Errorf("unable to find license file in `*.egg` file: %w", err) + } else if f == nil { // zip doesn't contain license file + return nil, nil + } + + return f, nil + } + + if err = fillAdditionalData(opener, app, a.licenseClassifierConfidenceLevel); err != nil { + a.logger.Warn("Unable to collect additional info", log.Err(err)) + } + + return &analyzer.AnalysisResult{ + Applications: []types.Application{*app}, + }, nil +} + +func findFileInZip(r xio.ReadSeekerAt, zipSize int64, required func(filePath string) bool) (io.ReadCloser, error) { + if _, err := r.Seek(0, io.SeekStart); err != nil { + return nil, xerrors.Errorf("file seek error: %w", err) + } + + zr, err := zip.NewReader(r, zipSize) + if err != nil { + return nil, xerrors.Errorf("zip reader error: %w", err) + } + + found, ok := lo.Find(zr.File, func(f *zip.File) bool { + return required(f.Name) + }) + if !ok { + return nil, nil + } + + f, err := found.Open() + if err != nil { + return nil, xerrors.Errorf("unable to open file in zip: %w", err) + } + + return f, nil +} + +func (a *eggAnalyzer) Required(filePath string, _ os.FileInfo) bool { + return filepath.Ext(filePath) == eggExt +} + +func (a *eggAnalyzer) Type() analyzer.Type { + return analyzer.TypePythonPkgEgg +} + +func (a *eggAnalyzer) Version() int { + return eggAnalyzerVersion +} diff --git a/pkg/fanal/analyzer/language/python/packaging/egg_test.go b/pkg/fanal/analyzer/language/python/packaging/egg_test.go new file mode 100644 index 000000000000..615dffb4b0cc --- /dev/null +++ b/pkg/fanal/analyzer/language/python/packaging/egg_test.go @@ -0,0 +1,146 @@ +package packaging + +import ( + "context" + "os" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/aquasecurity/trivy/pkg/fanal/analyzer" + "github.com/aquasecurity/trivy/pkg/fanal/types" +) + +func Test_eggAnalyzer_Analyze(t *testing.T) { + tests := []struct { + name string + inputFile string + includeChecksum bool + want *analyzer.AnalysisResult + wantErr string + }{ + { + name: "egg zip", + inputFile: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + want: &analyzer.AnalysisResult{ + Applications: []types.Application{ + { + Type: types.PythonPkg, + FilePath: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + Packages: types.Packages{ + { + Name: "kitchen", + Version: "1.2.6", + Licenses: []string{ + "LGPL-2.1-only", + }, + FilePath: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + }, + }, + }, + }, + }, + }, + { + name: "egg zip with checksum", + inputFile: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + includeChecksum: true, + want: &analyzer.AnalysisResult{ + Applications: []types.Application{ + { + Type: types.PythonPkg, + FilePath: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + Packages: types.Packages{ + { + Name: "kitchen", + Version: "1.2.6", + Licenses: []string{ + "LGPL-2.1-only", + }, + FilePath: "testdata/egg-zip/kitchen-1.2.6-py2.7.egg", + Digest: "sha1:4e13b6e379966771e896ee43cf8e240bf6083dca", + }, + }, + }, + }, + }, + }, + { + name: "egg zip with license file", + inputFile: "testdata/egg-zip-with-license-file/sample_package.egg", + want: &analyzer.AnalysisResult{ + Applications: []types.Application{ + { + Type: types.PythonPkg, + FilePath: "testdata/egg-zip-with-license-file/sample_package.egg", + Packages: types.Packages{ + { + Name: "sample_package", + Version: "0.1", + Licenses: []string{ + "MIT", + }, + FilePath: "testdata/egg-zip-with-license-file/sample_package.egg", + }, + }, + }, + }, + }, + }, + { + name: "egg zip doesn't contain required files", + inputFile: "testdata/no-req-files/no-required-files.egg", + want: nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f, err := os.Open(tt.inputFile) + require.NoError(t, err) + defer f.Close() + fileInfo, err := os.Lstat(tt.inputFile) + require.NoError(t, err) + + a := &eggAnalyzer{} + got, err := a.Analyze(context.Background(), analyzer.AnalysisInput{ + Content: f, + FilePath: tt.inputFile, + Info: fileInfo, + Options: analyzer.AnalysisOptions{ + FileChecksum: tt.includeChecksum, + }, + }) + + require.NoError(t, err) + assert.Equal(t, tt.want, got) + }) + } + +} + +func Test_eggAnalyzer_Required(t *testing.T) { + tests := []struct { + name string + filePath string + want bool + }{ + { + name: "egg zip", + filePath: "python2.7/site-packages/cssutils-1.0-py2.7.egg", + want: true, + }, + { + name: "egg-info PKG-INFO", + filePath: "python3.8/site-packages/wrapt-1.12.1.egg-info/PKG-INFO", + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + a := eggAnalyzer{} + got := a.Required(tt.filePath, nil) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/fanal/analyzer/language/python/packaging/packaging.go b/pkg/fanal/analyzer/language/python/packaging/packaging.go index 73e5f446bc40..944a5abde331 100644 --- a/pkg/fanal/analyzer/language/python/packaging/packaging.go +++ b/pkg/fanal/analyzer/language/python/packaging/packaging.go @@ -1,8 +1,6 @@ package packaging import ( - "archive/zip" - "bytes" "context" "errors" "io" @@ -29,7 +27,7 @@ func init() { analyzer.RegisterPostAnalyzer(analyzer.TypePythonPkg, newPackagingAnalyzer) } -const version = 1 +const version = 2 func newPackagingAnalyzer(opt analyzer.AnalyzerOptions) (analyzer.PostAnalyzer, error) { return &packagingAnalyzer{ @@ -43,7 +41,7 @@ var ( eggFiles = []string{ // .egg format // https://setuptools.readthedocs.io/en/latest/deprecated/python_eggs.html#eggs-and-their-formats - ".egg", // zip format + // ".egg" is zip format. We check it in `eggAnalyzer`. "EGG-INFO/PKG-INFO", // .egg-info format: .egg-info can be a file or directory @@ -68,38 +66,32 @@ func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAna return filepath.Base(path) == "METADATA" || isEggFile(path) } - err := fsutils.WalkDir(input.FS, ".", required, func(path string, d fs.DirEntry, r io.Reader) error { + err := fsutils.WalkDir(input.FS, ".", required, func(filePath string, d fs.DirEntry, r io.Reader) error { rsa, ok := r.(xio.ReadSeekerAt) if !ok { return xerrors.New("invalid reader") } - // .egg file is zip format and PKG-INFO needs to be extracted from the zip file. - if strings.HasSuffix(path, ".egg") { - info, err := d.Info() - if err != nil { - return xerrors.Errorf("egg file error: %w", err) - } - pkginfoInZip, err := a.analyzeEggZip(rsa, info.Size()) - if err != nil { - return xerrors.Errorf("egg analysis error: %w", err) - } - - // Egg archive may not contain required files, then we will get nil. Skip this archives - if pkginfoInZip == nil { - return nil - } - rsa = pkginfoInZip - } - - app, err := a.parse(path, rsa, input.Options.FileChecksum) + app, err := a.parse(filePath, rsa, input.Options.FileChecksum) if err != nil { return xerrors.Errorf("parse error: %w", err) } else if app == nil { return nil } - if err := a.fillAdditionalData(input.FS, app); err != nil { + opener := func(licPath string) (io.ReadCloser, error) { + // Note that fs.FS is always slashed regardless of the platform, + // and path.Join should be used rather than filepath.Join. + f, err := input.FS.Open(path.Join(path.Dir(filePath), licPath)) + if errors.Is(err, fs.ErrNotExist) { + return nil, nil + } else if err != nil { + return nil, xerrors.Errorf("file open error: %w", err) + } + return f, nil + } + + if err = fillAdditionalData(opener, app, a.licenseClassifierConfidenceLevel); err != nil { a.logger.Warn("Unable to collect additional info", log.Err(err)) } @@ -115,7 +107,9 @@ func (a packagingAnalyzer) PostAnalyze(_ context.Context, input analyzer.PostAna }, nil } -func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application) error { +type fileOpener func(filePath string) (io.ReadCloser, error) + +func fillAdditionalData(opener fileOpener, app *types.Application, licenseClassifierConfidenceLevel float64) error { for i, pkg := range app.Packages { var licenses []string for _, lic := range pkg.Licenses { @@ -126,19 +120,12 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application licenses = append(licenses, lic) continue } - licenseFilePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix)) + licensePath := path.Base(strings.TrimPrefix(lic, licensing.LicenseFilePrefix)) - findings, err := classifyLicense(app.FilePath, licenseFilePath, a.licenseClassifierConfidenceLevel, fsys) + foundLicenses, err := classifyLicenses(opener, licensePath, licenseClassifierConfidenceLevel) if err != nil { - return err - } else if len(findings) == 0 { - continue + return xerrors.Errorf("unable to classify licenses: %w", err) } - - // License found - foundLicenses := lo.Map(findings, func(finding types.LicenseFinding, _ int) string { - return finding.Name - }) licenses = append(licenses, foundLicenses...) } app.Packages[i].Licenses = licenses @@ -147,62 +134,32 @@ func (a packagingAnalyzer) fillAdditionalData(fsys fs.FS, app *types.Application return nil } -func classifyLicense(dir, licPath string, classifierConfidenceLevel float64, fsys fs.FS) (types.LicenseFindings, error) { - // Note that fs.FS is always slashed regardless of the platform, - // and path.Join should be used rather than filepath.Join. - f, err := fsys.Open(path.Join(path.Dir(dir), licPath)) - if errors.Is(err, fs.ErrNotExist) { +func classifyLicenses(opener fileOpener, licPath string, licenseClassifierConfidenceLevel float64) ([]string, error) { + f, err := opener(licPath) + if err != nil { + return nil, xerrors.Errorf("unable to open license file: %w", err) + } else if f == nil { // File doesn't exist return nil, nil - } else if err != nil { - return nil, xerrors.Errorf("file open error: %w", err) } defer f.Close() - l, err := licensing.Classify(licPath, f, classifierConfidenceLevel) + l, err := licensing.Classify("", f, licenseClassifierConfidenceLevel) if err != nil { return nil, xerrors.Errorf("license classify error: %w", err) - } else if l == nil { + } else if l == nil { // No licenses found return nil, nil } - return l.Findings, nil + // License found + return lo.Map(l.Findings, func(finding types.LicenseFinding, _ int) string { + return finding.Name + }), nil } func (a packagingAnalyzer) parse(filePath string, r xio.ReadSeekerAt, checksum bool) (*types.Application, error) { return language.ParsePackage(types.PythonPkg, filePath, r, a.pkgParser, checksum) } -func (a packagingAnalyzer) analyzeEggZip(r io.ReaderAt, size int64) (xio.ReadSeekerAt, error) { - zr, err := zip.NewReader(r, size) - if err != nil { - return nil, xerrors.Errorf("zip reader error: %w", err) - } - - found, ok := lo.Find(zr.File, func(f *zip.File) bool { - return isEggFile(f.Name) - }) - if !ok { - return nil, nil - } - return a.open(found) -} - -// open reads the file content in the zip archive to make it seekable. -func (a packagingAnalyzer) open(file *zip.File) (xio.ReadSeekerAt, error) { - f, err := file.Open() - if err != nil { - return nil, err - } - defer f.Close() - - b, err := io.ReadAll(f) - if err != nil { - return nil, xerrors.Errorf("file %s open error: %w", file.Name, err) - } - - return bytes.NewReader(b), nil -} - func (a packagingAnalyzer) Required(filePath string, _ os.FileInfo) bool { return strings.Contains(filePath, ".dist-info") || isEggFile(filePath) } diff --git a/pkg/fanal/analyzer/language/python/packaging/packaging_test.go b/pkg/fanal/analyzer/language/python/packaging/packaging_test.go index 960a92dfec2b..47a9cca901b6 100644 --- a/pkg/fanal/analyzer/language/python/packaging/packaging_test.go +++ b/pkg/fanal/analyzer/language/python/packaging/packaging_test.go @@ -20,28 +20,6 @@ func Test_packagingAnalyzer_Analyze(t *testing.T) { want *analyzer.AnalysisResult wantErr string }{ - { - name: "egg zip", - dir: "testdata/egg-zip", - want: &analyzer.AnalysisResult{ - Applications: []types.Application{ - { - Type: types.PythonPkg, - FilePath: "kitchen-1.2.6-py2.7.egg", - Packages: types.Packages{ - { - Name: "kitchen", - Version: "1.2.6", - Licenses: []string{ - "LGPL-2.1-only", - }, - FilePath: "kitchen-1.2.6-py2.7.egg", - }, - }, - }, - }, - }, - }, { name: "egg-info", dir: "testdata/happy-egg", @@ -124,11 +102,6 @@ func Test_packagingAnalyzer_Analyze(t *testing.T) { }, }, }, - { - name: "egg zip doesn't contain required files", - dir: "testdata/no-req-files", - want: &analyzer.AnalysisResult{}, - }, { name: "license file in dist.info", dir: "testdata/license-file-dist", diff --git a/pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg b/pkg/fanal/analyzer/language/python/packaging/testdata/egg-zip-with-license-file/sample_package.egg new file mode 100644 index 0000000000000000000000000000000000000000..91d67dc5947b71b0d541924d68430e56d2aaa360 GIT binary patch literal 1135 zcmWIWW@Zs#VBlb2C`?t3WIzI(K(?#9yRN67o4YrYdt0HtLZ85jgn zl|j_|_MOdRGURD_U&*!KVN=0bPM7JrS}ju-yxq8|X}7fLO_$Dz58g^IeB1w|+4gg# z3478Wg~c_CV`Ao*=v;M@;90UT!DF7C&@Fu#k3T}(TSb;XlTK#0Giy&o&U)_dL9( zC`)zA-tW&?(l>HRNTy$w;hgcH>jB4Xx8LurA1}Ro*K6X1J^Q8ybMWrWJ^SQrTJX%D zX?wTlF}+SS*t~Pkq0+o%3A-QsK<@5^-1(RlBnyJzJ1@2HxTZMrNfm2vm) zRI|ek2KT-`ym*Ffx^>2$^0m1F{8rcX%#B;s1mg4EA3yY#;K_OYHR-qMq>^Ql2mKs2 z?^mw4VX#B>Ku*n!TyF_Zy{(TgO>Eo$_~N}cryQPd-phAT_{F1(j4yp2JhQre`MOK= z_oqft_9_=5oA&82&Ym~RaA`|TO|fx>abHV9VELC{>-63UoSw_Xo9#03yWx)8>FN@J z`_Bjm&8oO;*`s6h&nR~R-`ekaGIyKTY|VS6WBKX6xpeR1XOHS;zfir^e3Es_FSSOw z2mk)9dVPYQH*5b4Q-dc*D`xYTr7il-D1R^MTe-RWzW=#)iuK19PGhf4>S|uGFzA!( z5}Q`t)f?}r6$n{PcIVx(FkFuPkc4s7mzf+tkvqN0yf-oV0m*<4$47u`