diff --git a/libindex/libindex.go b/libindex/libindex.go index b9e8874a1..93bae0e64 100644 --- a/libindex/libindex.go +++ b/libindex/libindex.go @@ -21,6 +21,7 @@ import ( "github.com/quay/claircore/indexer" "github.com/quay/claircore/indexer/controller" "github.com/quay/claircore/java" + "github.com/quay/claircore/nodejs" "github.com/quay/claircore/pkg/omnimatcher" "github.com/quay/claircore/python" "github.com/quay/claircore/rhel" @@ -103,6 +104,7 @@ func New(ctx context.Context, opts *Options, cl *http.Client) (*Libindex, error) rhcc.NewEcosystem(ctx), gobin.NewEcosystem(ctx), ruby.NewEcosystem(ctx), + nodejs.NewEcosystem(ctx), } } // Add whiteout objects diff --git a/nodejs/coalescer.go b/nodejs/coalescer.go new file mode 100644 index 000000000..469cc7446 --- /dev/null +++ b/nodejs/coalescer.go @@ -0,0 +1,48 @@ +package nodejs + +import ( + "context" + "github.com/quay/claircore" + + "github.com/quay/claircore/indexer" +) + +var _ indexer.Coalescer = (*coalescer)(nil) + +type coalescer struct{} + +func NewCoalescer(_ context.Context) (indexer.Coalescer, error) { + return &coalescer{}, nil +} + +func (c *coalescer) Coalesce(_ context.Context, ls []*indexer.LayerArtifacts) (*claircore.IndexReport, error) { + ir := &claircore.IndexReport{ + Environments: map[string][]*claircore.Environment{}, + Packages: map[string]*claircore.Package{}, + Repositories: map[string]*claircore.Repository{}, + } + + for _, l := range ls { + // If we didn't find at least one npm repo in this layer + // no point in searching for packages. + if len(l.Repos) == 0 { + continue + } + rs := make([]string, len(l.Repos)) + for i, r := range l.Repos { + rs[i] = r.ID + ir.Repositories[r.ID] = r + } + for _, pkg := range l.Pkgs { + ir.Packages[pkg.ID] = pkg + ir.Environments[pkg.ID] = []*claircore.Environment{ + { + PackageDB: pkg.PackageDB, + IntroducedIn: l.Hash, + RepositoryIDs: rs, + }, + } + } + } + return ir, nil +} diff --git a/nodejs/ecosystem.go b/nodejs/ecosystem.go new file mode 100644 index 000000000..de6f84c83 --- /dev/null +++ b/nodejs/ecosystem.go @@ -0,0 +1,19 @@ +package nodejs + +import ( + "context" + + "github.com/quay/claircore/indexer" +) + +var scanners = []indexer.PackageScanner{&Scanner{}} + +// NewEcosystem provides the set of scanners for the nodejs ecosystem. +func NewEcosystem(_ context.Context) *indexer.Ecosystem { + return &indexer.Ecosystem{ + PackageScanners: func(_ context.Context) ([]indexer.PackageScanner, error) { return scanners, nil }, + DistributionScanners: func(_ context.Context) ([]indexer.DistributionScanner, error) { return nil, nil }, + RepositoryScanners: func(_ context.Context) ([]indexer.RepositoryScanner, error) { return nil, nil }, + Coalescer: NewCoalescer, + } +} diff --git a/nodejs/packagescanner.go b/nodejs/packagescanner.go new file mode 100644 index 000000000..959a9539a --- /dev/null +++ b/nodejs/packagescanner.go @@ -0,0 +1,154 @@ +// Package nodejs contains components for interrogating nodejs packages in +// container layers. +package nodejs + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io/fs" + "path/filepath" + "runtime/trace" + "strings" + + "github.com/quay/zlog" + + "github.com/quay/claircore" + "github.com/quay/claircore/indexer" +) + +const repository = "npm" + +var ( + _ indexer.VersionedScanner = (*Scanner)(nil) + _ indexer.PackageScanner = (*Scanner)(nil) + _ indexer.DefaultRepoScanner = (*Scanner)(nil) + + Repository = claircore.Repository{ + Name: repository, + URI: "https://www.npmjs.com/", + } +) + +// Scanner implements the scanner.PackageScanner interface. +// +// It looks for files that seem like package.json and looks at the +// metadata recorded there. +// +// The zero value is ready to use. +type Scanner struct{} + +// Name implements scanner.VersionedScanner. +func (*Scanner) Name() string { return "nodejs" } + +// Version implements scanner.VersionedScanner. +func (*Scanner) Version() string { return "1" } + +// Kind implements scanner.VersionedScanner. +func (*Scanner) Kind() string { return "package" } + +// packageJSON represents the fields of a package.json file +// useful for package scanning. +// +// See https://docs.npmjs.com/files/package.json/ for more details +// about the format of package.json files. +type packageJSON struct { + Name string `json:"name"` + Version string `json:"version"` +} + +// Scan attempts to find package.json files and record the package +// information there. +// +// A return of (nil, nil) is expected if there's nothing found. +func (s *Scanner) Scan(ctx context.Context, layer *claircore.Layer) ([]*claircore.Package, error) { + defer trace.StartRegion(ctx, "Scanner.Scan").End() + trace.Log(ctx, "layer", layer.Hash.String()) + ctx = zlog.ContextWithValues(ctx, + "component", "nodejs/Scanner.Scan", + "version", s.Version(), + "layer", layer.Hash.String()) + zlog.Debug(ctx).Msg("start") + defer zlog.Debug(ctx).Msg("done") + if err := ctx.Err(); err != nil { + return nil, err + } + + sys, err := layer.FS() + if err != nil { + return nil, fmt.Errorf("nodejs: unable to open layer: %w", err) + } + + pkgs, err := packages(ctx, sys) + if err != nil { + return nil, fmt.Errorf("nodejs: failed to find packages: %w", err) + } + if len(pkgs) == 0 { + return nil, nil + } + + ret := make([]*claircore.Package, 0, len(pkgs)) + for _, p := range pkgs { + f, err := sys.Open(p) + if err != nil { + return nil, fmt.Errorf("nodejs: unable to open file: %w", err) + } + + var pkgJSON packageJSON + err = json.NewDecoder(bufio.NewReader(f)).Decode(&pkgJSON) + if err != nil { + return nil, fmt.Errorf("nodejs: unable to decode package.json file: %w", err) + } + + ret = append(ret, &claircore.Package{ + Name: pkgJSON.Name, + Version: pkgJSON.Version, + Kind: claircore.BINARY, + PackageDB: "nodejs:" + p, + RepositoryHint: repository, + }) + } + + return ret, nil +} + +func packages(ctx context.Context, sys fs.FS) (out []string, err error) { + return out, fs.WalkDir(sys, ".", func(p string, d fs.DirEntry, err error) error { + ev := zlog.Debug(ctx). + Str("file", p) + var success bool + defer func() { + if !success { + ev.Discard().Send() + } + }() + switch { + case err != nil: + return err + case !d.Type().IsRegular(): + // Should we chase symlinks with the correct name? + return nil + case strings.HasPrefix(filepath.Base(p), ".wh."): + return nil + case strings.Count(p, `node_modules`) != 1: + // Only bother with package.json files within the top-level node_modules/ directory. + // See https://docs.npmjs.com/cli/v7/configuring-npm/folders#node-modules + // for more information. + return nil + case strings.HasSuffix(p, "/package.json"): + ev = ev.Str("kind", "package.json") + default: + return nil + } + ev.Msg("found package") + success = true + out = append(out, p) + return nil + }) +} + +// DefaultRepository implements [indexer.DefaultRepoScanner]. +func (*Scanner) DefaultRepository(_ context.Context) *claircore.Repository { + return &Repository +} diff --git a/nodejs/packagescanner_test.go b/nodejs/packagescanner_test.go new file mode 100644 index 000000000..479f496bb --- /dev/null +++ b/nodejs/packagescanner_test.go @@ -0,0 +1,100 @@ +package nodejs_test + +import ( + "context" + "fmt" + "os" + "testing" + + "github.com/quay/claircore" + "github.com/quay/claircore/nodejs" + + "github.com/google/go-cmp/cmp" + "github.com/quay/zlog" +) + +func TestScanLocal(t *testing.T) { + ctx, done := context.WithCancel(context.Background()) + defer done() + + table := []struct { + name string + want []*claircore.Package + layerPath string + }{ + { + name: "sample NodeJS app", + want: []*claircore.Package{ + { + Name: "accepts", + Version: "1.3.8", + }, + { + Name: "array-flatten", + Version: "1.1.1", + }, + { + Name: "express", + Version: "4.18.2", + }, + { + Name: "ipaddr.js", + Version: "1.9.1", + }, + }, + layerPath: "testdata/sample-nodejs-app.tar", + }, + } + for _, tt := range table { + t.Run(tt.name, func(t *testing.T) { + file, err := os.Open(tt.layerPath) + if err != nil { + t.Fatal(err) + } + defer file.Close() + + ctx := zlog.Test(ctx, t) + scanner := &nodejs.Scanner{} + var l claircore.Layer + err = l.Init(ctx, &claircore.LayerDescription{ + Digest: "sha256:1e1bb6832aca0391eefafc58fd9a6b77d728eab3195c536562a86f15b06aed92", + MediaType: `application/vnd.oci.image.layer.v1.tar`, + }, file) + if err != nil { + t.Fatal(err) + } + defer l.Close() + + got, err := scanner.Scan(ctx, &l) + if err != nil { + t.Error(err) + } + if !cmp.Equal(len(got), 56) { + t.Error(cmp.Diff(len(got), 56)) + } + + gotMap := make(map[string]*claircore.Package, len(got)) + for _, g := range got { + gotMap[g.Name] = g + } + + // Test a select few packages. + for _, w := range tt.want { + g, exists := gotMap[w.Name] + if !exists { + t.Error(fmt.Sprintf("%s was not found", w.Name)) + continue + } + + // Only compare name and version at this time. + p := &claircore.Package{ + Name: g.Name, + Version: g.Version, + } + if !cmp.Equal(p, w) { + t.Error(cmp.Diff(p, w)) + } + } + }) + } +} diff --git a/nodejs/testdata/README.md b/nodejs/testdata/README.md new file mode 100644 index 000000000..1cebb85a3 --- /dev/null +++ b/nodejs/testdata/README.md @@ -0,0 +1,39 @@ +# testdata + +## sample-nodejs-app.tar + +This layer is derived from a sample NodeJS 16 image +from https://nodejs.org/en/docs/guides/nodejs-docker-webapp/. + +It was created by the following Dockerfile: + +``` +FROM node:16 + +# Install app dependencies +# A wildcard is used to ensure both package.json AND package-lock.json are copied +# where available (npm@5+) +COPY package*.json ./ + +RUN npm install +``` + +The related package.json is as follows: + +``` +{ + "name": "docker_web_app", + "version": "1.0.0", + "description": "Node.js on Docker", + "author": "First Last ", + "main": "server.js", + "scripts": { + "start": "node server.js" + }, + "dependencies": { + "express": "^4.16.1" + } +} +``` + +The layer tar was extracted from the image (top layer). diff --git a/nodejs/testdata/sample-nodejs-app.tar b/nodejs/testdata/sample-nodejs-app.tar new file mode 100644 index 000000000..999c04b5c Binary files /dev/null and b/nodejs/testdata/sample-nodejs-app.tar differ