From 623f5386851a623060d7a8bf032fce8e3916825a Mon Sep 17 00:00:00 2001 From: Arjun Sreedharan Date: Mon, 9 Sep 2024 11:10:46 +0000 Subject: [PATCH] SyftCLIScanner: support SBOM generation with syft CLI Packit currently supports SBOM generation with syft tooling by utilizing syft's go library. This has caused packit maintainers significant maintainence burden. This commit adds a mechanism for buildpack authors to utlize the syft CLI instead to generate SBOM. The intention here is that with widespread adoption of this, we can phase out the codebase that uses the syft go libary and thereby relieve the maintainers of this pain. Until recently, syft did not allow consumers to specify the exact schema version of an SBOM mediatype they want generated (the tooling currently supports passing a version for CycloneDX and SPDX - github.com/anchore/syft/issues/846#issuecomment-1908676454). So packit was forced to vendor-in (copy) large chunks of upstream syft go code into packit in order to pin SBOM mediatype versions to versions that most consumers wanted to use. Everytime a new version of Syft comes out, maintainers had to painfully update the vendored-in code to work with upstream syft components (e.g. github.com/paketo-buildpacks/packit/pull/491). Furthermore, it is advantageous to use the syft CLI instead of syft go library for multiple reasons. With CLI, we can delegate the entire SBOM generation mechanism easily to syft. It should help buildpacks avoid any CVEs that are exposed to it via syft go libaries. The CLI tool is well documented and widely used in the community, and it seems like the syft project is developed with with a CLI-first approach. The caveat here is that buildpack authors who use this method should include the Paketo Syft buildpack in their buildplan to have access to the CLI during the build phase. Example usage: \# detect \# unless BP_DISABLE_BOM is true requirements = append(requirements, packit.BuildPlanRequirement{ Name: "syft", Metadata: map[string]interface{}{ "build": true, }, }) \# build syftCLIScanner := sbomgen.NewSyftCLIScanner( pexec.NewExecutable("syft"), scribe.NewEmitter(os.Stdout), ) \# To scan a layer after installing a dependency _ = syftCLIScanner.GenerateSBOM(myLayer.Path, context.Layers.Path, myLayer.Name, context.BuildpackInfo.SBOMFormats..., ) \# OR to scan the workspace dir after running a process _ = syftCLIScanner.GenerateSBOM(context.WorkingDir, context.Layers.Path, myLayer.Name, context.BuildpackInfo.SBOMFormats..., ) - A new package sbomgen is created instead of adding the functionality to the existing sbom package because it helps buildpacks remove pinned "anchore/syft" lib from their go.mod which were flagged down by CVE scanners. - I have not implemented pretty-fication of SBOM that the codepath that use syft go lib implements. This seems to be adding bloat to the app image and not supported via CLI. Consumers of SBOM can easily prettify the SBOM JSONs. - In the codepath that use the syft go lib, license information is manually injected from buildpack.toml data into the SBOM. This is not available with the SyftCLIScanner. I couldn't find any reasoning for why this was done in the first place. - I have intentionally not reused some code in methods that's mixed up with the syft go library with an intention to easily phase out that codebase in the near future. --- sbomgen/fakes/executable.go | 32 +++ sbomgen/formats.go | 51 +++++ sbomgen/formats_test.go | 45 ++++ sbomgen/init_test.go | 42 ++++ sbomgen/syft_cli_scanner.go | 238 +++++++++++++++++++++ sbomgen/syft_cli_scanner_test.go | 345 +++++++++++++++++++++++++++++++ 6 files changed, 753 insertions(+) create mode 100644 sbomgen/fakes/executable.go create mode 100644 sbomgen/formats.go create mode 100644 sbomgen/formats_test.go create mode 100644 sbomgen/init_test.go create mode 100644 sbomgen/syft_cli_scanner.go create mode 100644 sbomgen/syft_cli_scanner_test.go diff --git a/sbomgen/fakes/executable.go b/sbomgen/fakes/executable.go new file mode 100644 index 00000000..484f7e89 --- /dev/null +++ b/sbomgen/fakes/executable.go @@ -0,0 +1,32 @@ +package fakes + +import ( + "sync" + + "github.com/paketo-buildpacks/packit/v2/pexec" +) + +type Executable struct { + ExecuteCall struct { + mutex sync.Mutex + CallCount int + Receives struct { + Execution pexec.Execution + } + Returns struct { + Err error + } + Stub func(pexec.Execution) error + } +} + +func (f *Executable) Execute(param1 pexec.Execution) error { + f.ExecuteCall.mutex.Lock() + defer f.ExecuteCall.mutex.Unlock() + f.ExecuteCall.CallCount++ + f.ExecuteCall.Receives.Execution = param1 + if f.ExecuteCall.Stub != nil { + return f.ExecuteCall.Stub(param1) + } + return f.ExecuteCall.Returns.Err +} diff --git a/sbomgen/formats.go b/sbomgen/formats.go new file mode 100644 index 00000000..6c645e16 --- /dev/null +++ b/sbomgen/formats.go @@ -0,0 +1,51 @@ +package sbomgen + +import ( + "fmt" + "mime" + "strings" +) + +const ( + CycloneDXFormat = "application/vnd.cyclonedx+json" + SPDXFormat = "application/spdx+json" + SyftFormat = "application/vnd.syft+json" +) + +// Format is the type declaration for the supported SBoM output formats. +type Format string + +// Extension outputs the expected file extension for a given Format. +// packit allows CycloneDX and SPDX mediatypes to have an optional +// version suffix. e.g. "application/vnd.cyclonedx+json;version=1.4" +// The version suffix is not allowed for the syft mediatype as the +// syft tooling does not support providing a version for this mediatype. +func (f Format) Extension() (string, error) { + switch { + case strings.HasPrefix(string(f), CycloneDXFormat): + return "cdx.json", nil + case strings.HasPrefix(string(f), SPDXFormat): + return "spdx.json", nil + case f == SyftFormat: + return "syft.json", nil + default: + return "", fmt.Errorf("Unknown mediatype %s", f) + } +} + +// Extracts optional version. This usually derives from the "sbom-formats" +// field used by packit-based buildpacks (@packit.SBOMFormats). e.g. +// "application/vnd.cyclonedx+json;version=1.4" -> "1.4" See +// github.com/paketo-buildpacks/packit/issues/302 +func (f Format) VersionParam() (string, error) { + _, params, err := mime.ParseMediaType(string(f)) + if err != nil { + return "", fmt.Errorf("failed to parse SBOM mediatype. Expected [;version=], Got %s: %w", f, err) + } + + version, ok := params["version"] + if !ok { + return "", nil + } + return version, nil +} diff --git a/sbomgen/formats_test.go b/sbomgen/formats_test.go new file mode 100644 index 00000000..44a674c7 --- /dev/null +++ b/sbomgen/formats_test.go @@ -0,0 +1,45 @@ +package sbomgen_test + +import ( + "testing" + + "github.com/paketo-buildpacks/packit/v2/sbomgen" + "github.com/sclevine/spec" + + . "github.com/onsi/gomega" +) + +func testFormats(t *testing.T, context spec.G, it spec.S) { + var Expect = NewWithT(t).Expect + var f sbomgen.Format + + context("Formats", func() { + context("no version param", func() { + it("gets the right mediatype extension and version", func() { + f = sbomgen.CycloneDXFormat + ext, err := f.Extension() + Expect(err).NotTo(HaveOccurred()) + Expect(ext).To(Equal("cdx.json")) + Expect(f.VersionParam()).To(Equal("")) + }) + }) + + context("with version param", func() { + it("gets the right mediatype extension and version", func() { + f = sbomgen.SPDXFormat + ";version=9.8.7" + ext, err := f.Extension() + Expect(err).NotTo(HaveOccurred()) + Expect(ext).To(Equal("spdx.json")) + Expect(f.VersionParam()).To(Equal("9.8.7")) + }) + context("Syft mediatype with version returns empty", func() { + it("returns error", func() { + f = sbomgen.SyftFormat + ";version=9.8.7" + ext, err := f.Extension() + Expect(err).To(MatchError(ContainSubstring("Unknown mediatype application/vnd.syft+json;version=9.8.7"))) + Expect(ext).To(Equal("")) + }) + }) + }) + }) +} diff --git a/sbomgen/init_test.go b/sbomgen/init_test.go new file mode 100644 index 00000000..422b3374 --- /dev/null +++ b/sbomgen/init_test.go @@ -0,0 +1,42 @@ +package sbomgen_test + +import ( + "testing" + "time" + + "github.com/onsi/gomega/format" + "github.com/sclevine/spec" + "github.com/sclevine/spec/report" +) + +func TestUnitSBOM(t *testing.T) { + format.MaxLength = 0 + + suite := spec.New("sbomgen", spec.Report(report.Terminal{})) + suite("Formats", testFormats) + suite("SyftCLIScanner", testSyftCLIScanner) + suite.Run(t) +} + +type externalRef struct { + Category string `json:"referenceCategory"` + Locator string `json:"referenceLocator"` + Type string `json:"referenceType"` +} + +type pkg struct { + ExternalRefs []externalRef `json:"externalRefs"` + LicenseConcluded string `json:"licenseConcluded"` + LicenseDeclared string `json:"licenseDeclared"` + Name string `json:"name"` + Version string `json:"versionInfo"` +} + +type spdxOutput struct { + Packages []pkg `json:"packages"` + SPDXVersion string `json:"spdxVersion"` + DocumentNamespace string `json:"documentNamespace"` + CreationInfo struct { + Created time.Time `json:"created"` + } `json:"creationInfo"` +} diff --git a/sbomgen/syft_cli_scanner.go b/sbomgen/syft_cli_scanner.go new file mode 100644 index 00000000..f8efcea7 --- /dev/null +++ b/sbomgen/syft_cli_scanner.go @@ -0,0 +1,238 @@ +package sbomgen + +import ( + "encoding/json" + "fmt" + "net/url" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/google/uuid" + "github.com/paketo-buildpacks/packit/v2/pexec" + "github.com/paketo-buildpacks/packit/v2/scribe" +) + +//go:generate faux --interface Executable --output fakes/executable.go +type Executable interface { + Execute(pexec.Execution) (err error) +} + +// SyftCLIScanner implements scanning a dir using the `syft` CLI +// to generate SBOM, process it, and write it to a location that complies with +// the buildpacks spec. Supports CycloneDX, SPDX and Syft mediatypes, with an +// optional version param for CycloneDX and Syft. +// +// Example Usage: +// +// syftCLIScanner := sbomgen.NewSyftCLIScanner( +// pexec.NewExecutable("syft"), +// scribe.NewEmitter(os.Stdout), +// ) +type SyftCLIScanner struct { + syftCLI Executable + logger scribe.Emitter +} + +func NewSyftCLIScanner(syftCLI Executable, logger scribe.Emitter) SyftCLIScanner { + return SyftCLIScanner{ + syftCLI: syftCLI, + logger: logger, + } +} + +// Generate takes a path to a directory to scan and a list of SBOM mediatypes +// (with an optional version for CycloneDX and SPDX), and invokes the syft CLI +// scan command. The CLI is instructed to write the SBOM to +// /.sbom. as defined by the buildpack spec. Additionally, +// CycloneDX & SPDX outputs are modified to make the output reproducible +// (Paketo RFCs 38 & 49). +func (s SyftCLIScanner) GenerateSBOM(scanDir, layersPath, layerName string, mediaTypes ...string) error { + sbomWritePaths := make(map[string]string) + args := []string{"scan", "--quiet"} + + s.logger.Debug.Process("Generating SBOM") + s.logger.Debug.Subprocess("Generating syft CLI args from provided mediatypes %s", mediaTypes) + for _, mediatype := range mediaTypes { + syftOutputFormat, err := s.specMediatypeToSyftOutputFormat(mediatype) + if err != nil { + return fmt.Errorf("failed to convert mediatype %s to syft output format: %w", mediatype, err) + } + + extension, err := Format(mediatype).Extension() + if err != nil { + return err + } + + // Layer SBOM write location during build is /.sbom. (CNB spec) + sbomWritePaths[mediatype] = filepath.Join(layersPath, fmt.Sprintf("%s.sbom.%s", layerName, extension)) + args = append(args, "--output", fmt.Sprintf("%s=%s", syftOutputFormat, sbomWritePaths[mediatype])) + } + + args = append(args, fmt.Sprintf("dir:%s", scanDir)) + + s.logger.Debug.Subprocess("Executing syft CLI with args %v", args) + if err := s.syftCLI.Execute(pexec.Execution{ + Args: args, + Dir: scanDir, + Stdout: s.logger.ActionWriter, + Stderr: s.logger.ActionWriter, + }); err != nil { + return fmt.Errorf("failed to execute syft cli with args '%s': %w.\nYou might be missing a buildpack that provides the syft CLI", args, err) + } + + // Make SBOM outputs reproducible + for _, mediatype := range mediaTypes { + if strings.HasPrefix(mediatype, CycloneDXFormat) { + s.logger.Debug.Subprocess("Processing syft CLI CycloneDX SBOM output to make it reproducible") + err := s.makeCycloneDXReproducible(sbomWritePaths[mediatype]) + if err != nil { + return fmt.Errorf("failed to make CycloneDX SBOM reproducible: %w", err) + } + } else if strings.HasPrefix(mediatype, SPDXFormat) { + s.logger.Debug.Subprocess("Processing syft CLI SPDX SBOM output to make it reproducible") + err := s.makeSPDXReproducible(sbomWritePaths[mediatype]) + if err != nil { + return fmt.Errorf("failed to make SPDX SBOM reproducible: %w", err) + } + } + } + + s.logger.Debug.Break() + return nil +} + +// This method takes an SBOM mediatype name as defined by the buildpack spec, +// (with an optional version param for CycloneDX and SPDX, e.g. +// "application/vnd.cyclonedx+json;version=1.4") and returns the output format +// understood by syft tooling (e.g. "cyclonedx-json@1.4"). +// Refer github.com/anchore/syft/blob/v1.11.1/cmd/syft/internal/options/writer.go#L86 +func (s SyftCLIScanner) specMediatypeToSyftOutputFormat(mediatype string) (string, error) { + optionalVersionParam, err := Format(mediatype).VersionParam() + if err != nil { + return "", err + } + if optionalVersionParam != "" { + optionalVersionParam = "@" + optionalVersionParam + } + + switch { + case strings.HasPrefix(mediatype, CycloneDXFormat): + return "cyclonedx-json" + optionalVersionParam, nil + case strings.HasPrefix(mediatype, SPDXFormat): + return "spdx-json" + optionalVersionParam, nil + case strings.HasPrefix(mediatype, SyftFormat): + // The syft tool does not support providing a version for the syft mediatype. + if optionalVersionParam != "" { + return "", fmt.Errorf("The syft mediatype does not allow providing a ;version= param. Got: %s", mediatype) + } + return "syft-json", nil + default: + return "", fmt.Errorf("mediatype %s matched none of the known mediatypes. Valid values are %s, with an optional version param for CycloneDX and SPDX", mediatype, []string{CycloneDXFormat, SPDXFormat, SyftFormat}) + } +} + +// Makes CycloneDX SBOM more reproducible. +// Remove fields serialNumber and metadata.timestamp. +// See https://github.com/paketo-buildpacks/rfcs/blob/main/text/0038-cdx-syft-sbom.md#amendment-sbom-reproducibility +func (s SyftCLIScanner) makeCycloneDXReproducible(path string) error { + in, err := os.Open(path) + if err != nil { + return fmt.Errorf("unable to read CycloneDX JSON file %s:%w", path, err) + } + defer in.Close() + + input := map[string]interface{}{} + if err := json.NewDecoder(in).Decode(&input); err != nil { + return fmt.Errorf("unable to decode CycloneDX JSON %s: %w", path, err) + } + + delete(input, "serialNumber") + + if md, exists := input["metadata"]; exists { + if metadata, ok := md.(map[string]interface{}); ok { + delete(metadata, "timestamp") + } + } + + out, err := os.Create(path) + if err != nil { + return fmt.Errorf("unable to open CycloneDX JSON for writing %s: %w", path, err) + } + defer out.Close() + + if err := json.NewEncoder(out).Encode(input); err != nil { + return fmt.Errorf("unable to encode CycloneDX: %w", err) + } + + return nil +} + +// Makes SPDX SBOM more reproducible. +// Ensure documentNamespace and creationInfo.created have reproducible values. +// The method respects $SOURCE_DATE_EPOCH for created timestamp if set. +// See github.com/paketo-buildpacks/rfcs/blob/main/text/0049-reproducible-spdx.md +func (s SyftCLIScanner) makeSPDXReproducible(path string) error { + in, err := os.Open(path) + if err != nil { + return fmt.Errorf("unable to read SPDX JSON file %s:%w", path, err) + } + defer in.Close() + + input := map[string]interface{}{} + if err := json.NewDecoder(in).Decode(&input); err != nil { + return fmt.Errorf("unable to decode SPDX JSON %s: %w", path, err) + } + + // Makes the creationInfo reproducible so a hash can be taken for the + // documentNamespace + if creationInfo, ok := input["creationInfo"].(map[string]interface{}); ok { + creationInfo["created"] = time.Time{} // This is the zero-valued time + + sourceDateEpoch := os.Getenv("SOURCE_DATE_EPOCH") + if sourceDateEpoch != "" { + sde, err := strconv.ParseInt(sourceDateEpoch, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse SOURCE_DATE_EPOCH: %w", err) + } + creationInfo["created"] = time.Unix(sde, 0).UTC() + } + input["creationInfo"] = creationInfo + } + + if namespace, ok := input["documentNamespace"].(string); ok { + delete(input, "documentNamespace") + + data, err := json.Marshal(input) + if err != nil { + return fmt.Errorf("failed to checksum SPDX document: %w", err) + } + + uri, err := url.Parse(namespace) + if err != nil { + return fmt.Errorf("failed to parse SPDX documentNamespace url: %w", err) + } + + uri.Host = "paketo.io" + uri.Path = strings.Replace(uri.Path, "syft", "packit", 1) + oldBase := filepath.Base(uri.Path) + source, _, _ := strings.Cut(oldBase, "-") + newBase := fmt.Sprintf("%s-%s", source, uuid.NewSHA1(uuid.NameSpaceURL, data)) + uri.Path = strings.Replace(uri.Path, oldBase, newBase, 1) + + input["documentNamespace"] = uri.String() + } + + out, err := os.Create(path) + if err != nil { + return fmt.Errorf("unable to open SPDX JSON for writing %s: %w", path, err) + } + defer out.Close() + + if err := json.NewEncoder(out).Encode(input); err != nil { + return fmt.Errorf("unable to encode SPDX: %w", err) + } + return nil +} diff --git a/sbomgen/syft_cli_scanner_test.go b/sbomgen/syft_cli_scanner_test.go new file mode 100644 index 00000000..38fb298f --- /dev/null +++ b/sbomgen/syft_cli_scanner_test.go @@ -0,0 +1,345 @@ +package sbomgen_test + +import ( + "bytes" + "encoding/json" + "fmt" + "os" + "path/filepath" + "reflect" + "strings" + "testing" + "time" + + "github.com/paketo-buildpacks/packit/v2/pexec" + "github.com/paketo-buildpacks/packit/v2/sbomgen" + "github.com/paketo-buildpacks/packit/v2/sbomgen/fakes" + "github.com/paketo-buildpacks/packit/v2/scribe" + "github.com/sclevine/spec" + + . "github.com/onsi/gomega" +) + +func testSyftCLIScanner(t *testing.T, context spec.G, it spec.S) { + var Expect = NewWithT(t).Expect + + context("NewSBOMCLIScanner", func() { + var ( + syftCLIScanner sbomgen.SyftCLIScanner + logsBuffer *bytes.Buffer + layersDir string + err error + + executions []pexec.Execution + executable *fakes.Executable + ) + + it.Before(func() { + logsBuffer = bytes.NewBuffer(nil) + executable = &fakes.Executable{} + + layersDir, err = os.MkdirTemp("", "layers") + Expect(err).NotTo(HaveOccurred()) + + executable.ExecuteCall.Stub = func(execution pexec.Execution) error { + executions = append(executions, execution) + if strings.Contains(strings.Join(execution.Args, " "), "cyclonedx-json") { + Expect(os.WriteFile(filepath.Join(layersDir, "some-layer-name.sbom.cdx.json"), []byte(`{ + "bomFormat": "CycloneDX", + "specVersion": "1.4", + "serialNumber": "urn:uuid:5d2fcb74-b20f-4091-b3ce-b29201f136eb", + "version": 1, + "metadata": { + "timestamp": "2024-09-09T17:28:12Z", + "tools": [ + { + "vendor": "anchore", + "name": "syft", + "version": "1.11.1" + } + ], + "component": { + "bom-ref": "5b6e90752b6334f9", + "type": "file", + "name": "/layers/paketo-buildpacks_node-engine/node" + } + } +}`), 0600)).To(Succeed()) + } + + if strings.Contains(strings.Join(execution.Args, " "), "spdx-json") { + Expect(os.WriteFile(filepath.Join(layersDir, "some-layer-name.sbom.spdx.json"), []byte(`{ + "spdxVersion": "SPDX-2.3", + "name": "/workspace", + "documentNamespace": "https://anchore.com/syft/dir/workspace-2188c148-ec69-4e9c-a6c5-e24f2d738ba2", + "creationInfo": { + "licenseListVersion": "3.23", + "created": "2024-08-07T17:28:12Z" + }, + "packages": [ + { + "name": "apackage", + "SPDXID": "SPDXRef-Package-npm-apackage-4bc84cbb6d76f2fa", + "versionInfo": "9.8.7", + "downloadLocation": "https://registry.npmjs.org/apackage/-/apackage-9.8.7.tgz" + } + ], + "files": [ + { + "fileName": "/package-lock.json", + "SPDXID": "SPDXRef-File-package-lock.json-fd71c2238fc07657" + } + ], + "relationships": [ + { + "relationshipType": "OTHER", + "comment": "evident-by: indicates the package's existence is evident by the given file" + } + ] +}`), 0600)).To(Succeed()) + } + return nil + } + + syftCLIScanner = sbomgen.NewSyftCLIScanner( + executable, + scribe.NewEmitter(logsBuffer), + ) + }) + + it.After(func() { + Expect(os.RemoveAll(layersDir)).To(Succeed()) + }) + + context("GenerateSBOM", func() { + context("syft CLI execution", func() { + context("single mediatype without a version", func() { + it("runs the cli commands to scan and generate SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", sbomgen.CycloneDXFormat) + Expect(err).NotTo(HaveOccurred()) + + Expect(executions).To(HaveLen(1)) + Expect(executions[0].Args).To(Equal([]string{ + "scan", + "--quiet", + "--output", fmt.Sprintf("cyclonedx-json=%s/some-layer-name.sbom.cdx.json", layersDir), + "dir:some-dir", + })) + }) + }) + + context("multiple mediatypes without a version", func() { + it("runs the cli commands to scan and generate SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", + sbomgen.CycloneDXFormat, sbomgen.SPDXFormat, sbomgen.SyftFormat) + Expect(err).NotTo(HaveOccurred()) + + Expect(executions).To(HaveLen(1)) + Expect(executions[0].Args).To(Equal([]string{ + "scan", + "--quiet", + "--output", fmt.Sprintf("cyclonedx-json=%s/some-layer-name.sbom.cdx.json", layersDir), + "--output", fmt.Sprintf("spdx-json=%s/some-layer-name.sbom.spdx.json", layersDir), + "--output", fmt.Sprintf("syft-json=%s/some-layer-name.sbom.syft.json", layersDir), + "dir:some-dir", + })) + }) + }) + + context("multiple mediatypes with and without version", func() { + it("runs the cli commands to scan and generate SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", + sbomgen.CycloneDXFormat+";version=1.2.3", sbomgen.SPDXFormat, sbomgen.SyftFormat) + Expect(err).NotTo(HaveOccurred()) + + Expect(executions).To(HaveLen(1)) + Expect(executions[0].Args).To(Equal([]string{ + "scan", + "--quiet", + "--output", fmt.Sprintf("cyclonedx-json@1.2.3=%s/some-layer-name.sbom.cdx.json", layersDir), + "--output", fmt.Sprintf("spdx-json=%s/some-layer-name.sbom.spdx.json", layersDir), + "--output", fmt.Sprintf("syft-json=%s/some-layer-name.sbom.syft.json", layersDir), + "dir:some-dir", + })) + }) + }) + }) + + context("making CLI CycloneDX output reproducible", func() { + it("removes non-reproducible fields from CycloneDX SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", sbomgen.CycloneDXFormat) + Expect(err).NotTo(HaveOccurred()) + + generatedSBOM, err := os.ReadFile(filepath.Join(layersDir, "some-layer-name.sbom.cdx.json")) + Expect(err).NotTo(HaveOccurred()) + + // This is the stub-generated SBOM with non-repro fields removed + expectedSBOM := `{ + "bomFormat": "CycloneDX", + "specVersion": "1.4", + "version": 1, + "metadata": { + "tools": [ + { + "vendor": "anchore", + "name": "syft", + "version": "1.11.1" + } + ], + "component": { + "bom-ref": "5b6e90752b6334f9", + "type": "file", + "name": "/layers/paketo-buildpacks_node-engine/node" + } + } +}` + var obj1, obj2 interface{} + err = json.Unmarshal([]byte(generatedSBOM), &obj1) + Expect(err).NotTo(HaveOccurred()) + err = json.Unmarshal([]byte(expectedSBOM), &obj2) + Expect(err).NotTo(HaveOccurred()) + Expect(reflect.DeepEqual(obj1, obj2)).To(BeTrue()) + }) + }) + + context("making CLI SPDX output reproducible", func() { + context("without setting $SOURCE_DATE_EPOCH", func() { + it("modifies non-reproducible fields from SPDX SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", sbomgen.SPDXFormat) + Expect(err).NotTo(HaveOccurred()) + + generatedSBOM, err := os.ReadFile(filepath.Join(layersDir, "some-layer-name.sbom.spdx.json")) + Expect(err).NotTo(HaveOccurred()) + + var generatedSBOMObj spdxOutput + err = json.Unmarshal([]byte(generatedSBOM), &generatedSBOMObj) + Expect(err).NotTo(HaveOccurred()) + + // Ensure documentNamespace and creationInfo.created have reproducible values + Expect(generatedSBOMObj.DocumentNamespace).To(Equal("https://paketo.io/packit/dir/workspace-b45eebde-57b8-5069-8df8-bcf8bc91810f")) + Expect(generatedSBOMObj.CreationInfo.Created).To(BeZero()) + + // Make sure others are unchanged + Expect(generatedSBOMObj.SPDXVersion).To(Equal("SPDX-2.3")) + Expect(generatedSBOMObj.Packages).To(HaveLen(1)) + Expect(generatedSBOMObj.Packages[0].Name).To(Equal("apackage")) + Expect(generatedSBOMObj.Packages[0].Version).To(Equal("9.8.7")) + }) + }) + + context("setting $SOURCE_DATE_EPOCH", func() { + var original string + + it.Before(func() { + original = os.Getenv("SOURCE_DATE_EPOCH") + Expect(os.Setenv("SOURCE_DATE_EPOCH", "1659551872")).To(Succeed()) + }) + + it.After(func() { + Expect(os.Setenv("SOURCE_DATE_EPOCH", original)).To(Succeed()) + }) + + it("modifies non-reproducible fields from SPDX SBOM", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", sbomgen.SPDXFormat) + Expect(err).NotTo(HaveOccurred()) + + generatedSBOM, err := os.ReadFile(filepath.Join(layersDir, "some-layer-name.sbom.spdx.json")) + Expect(err).NotTo(HaveOccurred()) + + var generatedSBOMObj spdxOutput + err = json.Unmarshal([]byte(generatedSBOM), &generatedSBOMObj) + Expect(err).NotTo(HaveOccurred()) + + // Ensure documentNamespace and creationInfo.created have reproducible values + Expect(generatedSBOMObj.DocumentNamespace).To(Equal("https://paketo.io/packit/dir/workspace-28ef3e20-b1ec-522e-9bd5-0fcf2b7ea5c2")) + Expect(generatedSBOMObj.CreationInfo.Created).To(Equal(time.Unix(1659551872, 0).UTC())) + }) + }) + }) + + context("failure cases", func() { + context("invalid mediatype name", func() { + it("shows an invalid type error", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", "whatever-mediatype") + Expect(err).To(MatchError(ContainSubstring("mediatype whatever-mediatype matched none of the known mediatypes. Valid values are [application/vnd.cyclonedx+json application/spdx+json application/vnd.syft+json], with an optional version param for CycloneDX and SPDX"))) + }) + }) + + context("invalid mediatype version format", func() { + it("shows an invalid mediatype version format error", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", "application/vnd.cyclonedx+json;;foo") + Expect(err).To(MatchError(ContainSubstring("Expected [;version=], Got application/vnd.cyclonedx+json;;foo"))) + }) + }) + + context("syft mediatype contains a version specifier", func() { + it("shows an error", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", + sbomgen.CycloneDXFormat, sbomgen.SPDXFormat, sbomgen.SyftFormat+";version=1.2.3") + Expect(err).To(MatchError(ContainSubstring("The syft mediatype does not allow providing a ;version= param"))) + }) + }) + + context("syft CLI execution fails", func() { + it.Before(func() { + executable.ExecuteCall.Stub = func(execution pexec.Execution) error { + fmt.Fprintln(execution.Stdout, "cli error stdout") + fmt.Fprintln(execution.Stderr, "cli error stderr") + return fmt.Errorf("cli command failed") + } + }) + it("returns an error & writes to logs", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", layersDir, "some-layer-name", sbomgen.CycloneDXFormat+";version=1.2.3", sbomgen.SPDXFormat, sbomgen.SyftFormat) + Expect(err).To(MatchError(ContainSubstring( + fmt.Sprintf("failed to execute syft cli with args '[scan --quiet --output cyclonedx-json@1.2.3=%s/some-layer-name.sbom.cdx.json --output spdx-json=%s/some-layer-name.sbom.spdx.json --output syft-json=%s/some-layer-name.sbom.syft.json dir:some-dir]'", layersDir, layersDir, layersDir)))) + Expect(err).To(MatchError(ContainSubstring("cli command failed"))) + Expect(err).To(MatchError(ContainSubstring("You might be missing a buildpack that provides the syft CLI"))) + + Expect(logsBuffer.String()).To(ContainSubstring("cli error stdout")) + Expect(logsBuffer.String()).To(ContainSubstring("cli error stderr")) + }) + }) + + context("making CycloneDX output reproducible fails", func() { + var tmpLayersDir string + var err error + + it.Before(func() { + tmpLayersDir, err = os.MkdirTemp("", "layers") + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(tmpLayersDir, "some-layer-name.sbom.cdx.json"), []byte(`invalid-sbom`), 0600)).To(Succeed()) + }) + + it.After(func() { + Expect(os.RemoveAll(tmpLayersDir)).To(Succeed()) + }) + + it("returns helpful error message", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", tmpLayersDir, "some-layer-name", sbomgen.CycloneDXFormat) + Expect(err).To(MatchError(ContainSubstring("failed to make CycloneDX SBOM reproducible: unable to decode CycloneDX JSON"))) + }) + }) + + context("making SPDX output reproducible fails", func() { + var tmpLayersDir string + var err error + + it.Before(func() { + tmpLayersDir, err = os.MkdirTemp("", "layers") + Expect(err).NotTo(HaveOccurred()) + Expect(os.WriteFile(filepath.Join(tmpLayersDir, "some-layer-name.sbom.spdx.json"), []byte(`invalid-sbom`), 0600)).To(Succeed()) + }) + + it.After(func() { + Expect(os.RemoveAll(tmpLayersDir)).To(Succeed()) + }) + + it("returns helpful error message", func() { + err := syftCLIScanner.GenerateSBOM("some-dir", tmpLayersDir, "some-layer-name", sbomgen.SPDXFormat) + Expect(err).To(MatchError(ContainSubstring("failed to make SPDX SBOM reproducible: unable to decode SPDX JSON"))) + }) + }) + }) + }) + }) +}