Skip to content

Commit

Permalink
Merge pull request #366 from mindedsecurity:extractor_haskell_cabal
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 716629087
  • Loading branch information
copybara-github committed Jan 17, 2025
2 parents be69ecf + a1c2a27 commit 33a0f08
Show file tree
Hide file tree
Showing 8 changed files with 487 additions and 2 deletions.
1 change: 1 addition & 0 deletions docs/supported_inventory_types.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ SCALIBR supports extracting software package information from a variety of OS an
* go.mod (OSV)
* Haskell
* stack.yaml.lock
* cabal.project.freeze
* Java
* Java archives
* Lockfiles: pom.xml, gradle.lockfile, verification-metadata.xml
Expand Down
192 changes: 192 additions & 0 deletions extractor/filesystem/language/haskell/cabal/cabal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package cabal extracts cabal.project.freeze files from haskell projects.
package cabal

import (
"bufio"
"context"
"fmt"
"path/filepath"
"regexp"
"strings"

"github.com/google/osv-scalibr/extractor"
"github.com/google/osv-scalibr/extractor/filesystem"
"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
"github.com/google/osv-scalibr/plugin"
"github.com/google/osv-scalibr/purl"
"github.com/google/osv-scalibr/stats"
)

const (
// Name is the unique name of this extractor.
Name = "haskell/cabal"

// defaultMaxFileSizeBytes is the maximum file size an extractor will unmarshal.
// If Extract gets a bigger file, it will return an error.
defaultMaxFileSizeBytes = 30 * units.MiB
)

// Config is the configuration for the Extractor.
type Config struct {
// Stats is a stats collector for reporting metrics.
Stats stats.Collector
// MaxFileSizeBytes is the maximum file size this extractor will unmarshal. If
// `FileRequired` gets a bigger file, it will return false,
MaxFileSizeBytes int64
}

// DefaultConfig returns the default configuration for the extractor.
func DefaultConfig() Config {
return Config{
MaxFileSizeBytes: defaultMaxFileSizeBytes,
Stats: nil,
}
}

// Extractor extracts cabal package info from cabal.project.freeze files.
type Extractor struct {
stats stats.Collector
maxFileSizeBytes int64
}

// New returns a haskell cabal extractor.
func New(cfg Config) *Extractor {
return &Extractor{
stats: cfg.Stats,
maxFileSizeBytes: cfg.MaxFileSizeBytes,
}
}

// Config returns the configuration of the extractor.
func (e Extractor) Config() Config {
return Config{
Stats: e.stats,
MaxFileSizeBytes: e.maxFileSizeBytes,
}
}

// Name of the extractor
func (e Extractor) Name() string { return Name }

// Version of the extractor
func (e Extractor) Version() int { return 0 }

// Requirements of the extractor.
func (e Extractor) Requirements() *plugin.Capabilities { return &plugin.Capabilities{} }

// FileRequired return true if the specified file matched the cabal.project.freeze file pattern.
func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
path := api.Path()

if filepath.Base(path) != "cabal.project.freeze" {
return false
}

fileinfo, err := api.Stat()
if err != nil {
return false
}
if e.maxFileSizeBytes > 0 && fileinfo.Size() > e.maxFileSizeBytes {
e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultSizeLimitExceeded)
return false
}

e.reportFileRequired(path, fileinfo.Size(), stats.FileRequiredResultOK)
return true
}

func (e Extractor) reportFileRequired(path string, fileSizeBytes int64, result stats.FileRequiredResult) {
if e.stats == nil {
return
}
e.stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
Path: path,
Result: result,
FileSizeBytes: fileSizeBytes,
})
}

// Extract extracts packages from the cabal.project.freeze file.
func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
inventory, err := e.extractFromInput(ctx, input)

if e.stats != nil {
var fileSizeBytes int64
if input.Info != nil {
fileSizeBytes = input.Info.Size()
}
e.stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
Path: input.Path,
Result: filesystem.ExtractorErrorToFileExtractedResult(err),
FileSizeBytes: fileSizeBytes,
})
}
return inventory, err
}

var versionConstraintRe = regexp.MustCompile(`any\.(\S+) ==(\S+)`)

func (e Extractor) extractFromInput(ctx context.Context, input *filesystem.ScanInput) ([]*extractor.Inventory, error) {
s := bufio.NewScanner(input.Reader)
pkgs := []*extractor.Inventory{}

for s.Scan() {
// Return if canceled or exceeding deadline.
if err := ctx.Err(); err != nil {
return pkgs, fmt.Errorf("%s halted at %q because of context error: %v", e.Name(), input.Path, err)
}

line := s.Text()

if strings.HasPrefix(line, "--") || strings.TrimSpace(line) == "" {
continue
}

matches := versionConstraintRe.FindStringSubmatch(line)

if len(matches) == 3 {
pkgName := matches[1]
pkgVersion := strings.TrimSuffix(matches[2], ",")

i := &extractor.Inventory{
Name: pkgName,
Version: pkgVersion,
Locations: []string{input.Path},
}

pkgs = append(pkgs, i)
}

if s.Err() != nil {
return pkgs, fmt.Errorf("error while scanning cabal.project.freeze file from %v: %w", input.Path, s.Err())
}
}

return pkgs, nil
}

// ToPURL converts an inventory created by this extractor into a PURL.
func (e Extractor) ToPURL(i *extractor.Inventory) *purl.PackageURL {
return &purl.PackageURL{
Type: purl.TypeHaskell,
Name: i.Name,
Version: i.Version,
}
}

// Ecosystem returns the OSV Ecosystem of the software extracted by this extractor.
func (Extractor) Ecosystem(i *extractor.Inventory) string { return "Hackage" }
Loading

0 comments on commit 33a0f08

Please sign in to comment.