diff --git a/cmd/csaf_aggregator/mirror.go b/cmd/csaf_aggregator/mirror.go index 1b197c42..0fd1de02 100644 --- a/cmd/csaf_aggregator/mirror.go +++ b/cmd/csaf_aggregator/mirror.go @@ -76,8 +76,7 @@ func (w *worker) mirrorInternal() (*csaf.AggregatorCSAFProvider, error) { w.client, w.expr, w.metadataProvider, - base, - nil) + base) if err := afp.Process(w.mirrorFiles); err != nil { return nil, err diff --git a/cmd/csaf_downloader/config.go b/cmd/csaf_downloader/config.go index 45afcdc8..07e9de64 100644 --- a/cmd/csaf_downloader/config.go +++ b/cmd/csaf_downloader/config.go @@ -11,6 +11,7 @@ package main import ( "net/http" + "github.com/csaf-poc/csaf_distribution/v2/internal/models" "github.com/csaf-poc/csaf_distribution/v2/internal/options" ) @@ -20,13 +21,14 @@ const ( ) type config struct { - Directory *string `short:"d" long:"directory" description:"DIRectory to store the downloaded files in" value-name:"DIR" toml:"directory"` - Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"` - IgnoreSignatureCheck bool `long:"ignoresigcheck" description:"Ignore signature check results, just warn on mismatch" toml:"ignoresigcheck"` - Version bool `long:"version" description:"Display version of the binary" toml:"-"` - Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"` - Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"` - Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"` + Directory *string `short:"d" long:"directory" description:"DIRectory to store the downloaded files in" value-name:"DIR" toml:"directory"` + Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"` + IgnoreSignatureCheck bool `long:"ignoresigcheck" description:"Ignore signature check results, just warn on mismatch" toml:"ignoresigcheck"` + Version bool `long:"version" description:"Display version of the binary" toml:"-"` + Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"` + Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"` + Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"` + Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"` ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"` diff --git a/cmd/csaf_downloader/downloader.go b/cmd/csaf_downloader/downloader.go index 41bd0768..cabe2fe3 100644 --- a/cmd/csaf_downloader/downloader.go +++ b/cmd/csaf_downloader/downloader.go @@ -150,8 +150,12 @@ func (d *downloader) download(ctx context.Context, domain string) error { client, d.eval, lpmd.Document, - base, - nil) + base) + + // Do we need time range based filtering? + if d.cfg.Range != nil { + afp.AgeAccept = d.cfg.Range.Contains + } return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error { return d.downloadFiles(ctx, label, files) diff --git a/csaf/advisories.go b/csaf/advisories.go index a371b659..26ff8509 100644 --- a/csaf/advisories.go +++ b/csaf/advisories.go @@ -9,11 +9,14 @@ package csaf import ( - "bufio" + "encoding/csv" + "fmt" + "io" "log" "net/http" "net/url" "strings" + "time" "github.com/csaf-poc/csaf_distribution/v2/util" ) @@ -71,11 +74,12 @@ func (haf HashedAdvisoryFile) SignURL() string { return haf.name(3, ".asc") } // AdvisoryFileProcessor implements the extraction of // advisory file names from a given provider metadata. type AdvisoryFileProcessor struct { - client util.Client - expr *util.PathEval - doc any - base *url.URL - log func(format string, args ...any) + AgeAccept func(time.Time) bool + Log func(format string, args ...any) + client util.Client + expr *util.PathEval + doc any + base *url.URL } // NewAdvisoryFileProcessor constructs an filename extractor @@ -85,14 +89,12 @@ func NewAdvisoryFileProcessor( expr *util.PathEval, doc any, base *url.URL, - log func(format string, args ...any), ) *AdvisoryFileProcessor { return &AdvisoryFileProcessor{ client: client, expr: expr, doc: doc, base: base, - log: log, } } @@ -111,7 +113,7 @@ func empty(arr []string) bool { func (afp *AdvisoryFileProcessor) Process( fn func(TLPLabel, []AdvisoryFile) error, ) error { - lg := afp.log + lg := afp.Log if lg == nil { lg = func(format string, args ...any) { log.Printf("AdvisoryFileProcessor.Process: "+format, args...) @@ -173,7 +175,8 @@ func (afp *AdvisoryFileProcessor) Process( continue } - files, err := afp.loadIndex(base, lg) + // Use changes.csv to be able to filter by age. + files, err := afp.loadChanges(base, lg) if err != nil { return err } @@ -186,9 +189,9 @@ func (afp *AdvisoryFileProcessor) Process( return nil } -// loadIndex loads baseURL/index.txt and returns a list of files +// loadChanges loads baseURL/changes.csv and returns a list of files // prefixed by baseURL/. -func (afp *AdvisoryFileProcessor) loadIndex( +func (afp *AdvisoryFileProcessor) loadChanges( baseURL string, lg func(string, ...any), ) ([]AdvisoryFile, error) { @@ -197,29 +200,53 @@ func (afp *AdvisoryFileProcessor) loadIndex( if err != nil { return nil, err } + changesURL := base.JoinPath("changes.csv").String() - indexURL := base.JoinPath("index.txt").String() - resp, err := afp.client.Get(indexURL) + resp, err := afp.client.Get(changesURL) if err != nil { return nil, err } - defer resp.Body.Close() - var files []AdvisoryFile - scanner := bufio.NewScanner(resp.Body) + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("fetching %s failed. Status code %d (%s)", + changesURL, resp.StatusCode, resp.Status) + } - for line := 1; scanner.Scan(); line++ { - u := scanner.Text() - if _, err := url.Parse(u); err != nil { - lg("index.txt contains invalid URL %q in line %d", u, line) + defer resp.Body.Close() + var files []AdvisoryFile + c := csv.NewReader(resp.Body) + const ( + pathColumn = 0 + timeColumn = 1 + ) + for line := 1; ; line++ { + r, err := c.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, err + } + if len(r) < 2 { + lg("%q has not enough columns in line %d", line) + continue + } + t, err := time.Parse(time.RFC3339, r[timeColumn]) + if err != nil { + lg("%q has an invalid time stamp in line %d: %v", changesURL, line, err) + continue + } + // Apply date range filtering. + if afp.AgeAccept != nil && !afp.AgeAccept(t) { + continue + } + path := r[pathColumn] + if _, err := url.Parse(path); err != nil { + lg("%q contains an invalid URL %q in line %d", changesURL, path, line) continue } files = append(files, - PlainAdvisoryFile(base.JoinPath(u).String())) - } - - if err := scanner.Err(); err != nil { - return nil, err + PlainAdvisoryFile(base.JoinPath(path).String())) } return files, nil } @@ -287,6 +314,13 @@ func (afp *AdvisoryFileProcessor) processROLIE( rfeed.Entries(func(entry *Entry) { + // Filter if we have date checking. + if afp.AgeAccept != nil { + if pub := time.Time(entry.Published); !pub.IsZero() && !afp.AgeAccept(pub) { + return + } + } + var self, sha256, sha512, sign string for i := range entry.Link { diff --git a/docs/csaf_downloader.md b/docs/csaf_downloader.md index 250d53ba..8874df13 100644 --- a/docs/csaf_downloader.md +++ b/docs/csaf_downloader.md @@ -14,6 +14,7 @@ Application Options: -v, --verbose Verbose output -r, --rate= The average upper limit of https operations per second (defaults to unlimited) -w, --worker=NUM NUMber of concurrent downloads (default: 2) + -t, --timerange=RANGE RANGE of time from which advisories to download -H, --header= One or more extra HTTP header fields --validator=URL URL to validate documents remotely --validatorcache=FILE FILE to cache remote validations @@ -54,4 +55,34 @@ worker = 2 # validator # not set by default # validatorcache # not set by default validatorpreset = ["mandatory"] +# timerange # not set by default ``` + +The `timerange` parameter enables downloading advisories which last changes falls +into a given intervall. There are three possible notations: + +1. Relative. If the given string follows the rules of being a [Go duration](https://pkg.go.dev/time@go1.20.6#ParseDuration) + the time interval from now minus that duration till now is used. + E.g. `"3h"` means downloading the advisories that have changed in the last three hours. + +2. Absolute. If the given string is an RFC 3339 date timestamp the time interval between + this date and now is used. + E.g. `"2006-01-02"` means that all files between 2006 January 2nd and now going to being + downloaded. + Accepted patterns are: + - `"2006-01-02T15:04:05Z"` + - `"2006-01-02T15:04:05+07:00"` + - `"2006-01-02T15:04:05-07:00"` + - `"2006-01-02T15:04:05"` + - `"2006-01-02T15:04"` + - `"2006-01-02T15"` + - `"2006-01-02"` + - `"2006-01"` + - `"2006"` + + Missing parts are set to the smallest value possible in that field. + +3. Range. Same as 2 but separated by a `,` to span an interval. e.g `2019,2024` + spans an interval from 1st January 2019 to the 1st January of 2024. + +All interval boundaries are inclusive. diff --git a/internal/models/models.go b/internal/models/models.go new file mode 100644 index 00000000..a7f6b02a --- /dev/null +++ b/internal/models/models.go @@ -0,0 +1,92 @@ +// This file is Free Software under the MIT License +// without warranty, see README.md and LICENSES/MIT.txt for details. +// +// SPDX-License-Identifier: MIT +// +// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) +// Software-Engineering: 2023 Intevation GmbH + +// Package models contains helper models used in the tools internally. +package models + +import ( + "fmt" + "strings" + "time" +) + +// TimeRange is a time interval. +type TimeRange [2]time.Time + +// NewTimeInterval creates a new time range. +// The time values will be sorted. +func NewTimeInterval(a, b time.Time) TimeRange { + if b.Before(a) { + a, b = b, a + } + return TimeRange{a, b} +} + +// guessDate tries to guess an RFC 3339 date time from a given string. +func guessDate(s string) (time.Time, bool) { + for _, layout := range []string{ + "2006-01-02T15:04:05Z07:00", + "2006-01-02T15:04:05", + "2006-01-02T15:04", + "2006-01-02T15", + "2006-01-02", + "2006-01", + "2006", + } { + if t, err := time.Parse(layout, s); err == nil { + return t, true + } + } + return time.Time{}, false +} + +// UnmarshalText implements [encoding/text.TextUnmarshaler]. +func (tr *TimeRange) UnmarshalText(text []byte) error { + return tr.UnmarshalFlag(string(text)) +} + +// UnmarshalFlag implements [go-flags/Unmarshaler]. +func (tr *TimeRange) UnmarshalFlag(s string) error { + s = strings.TrimSpace(s) + + // Handle relative case first. + if duration, err := time.ParseDuration(s); err == nil { + now := time.Now() + *tr = NewTimeInterval(now.Add(-duration), now) + return nil + } + + a, b, found := strings.Cut(s, ",") + a, b = strings.TrimSpace(a), strings.TrimSpace(b) + + // Only start date? + if !found { + start, ok := guessDate(a) + if !ok { + return fmt.Errorf("%q is not a valid RFC date time", a) + } + *tr = NewTimeInterval(start, time.Now()) + return nil + } + // Real interval + start, ok := guessDate(a) + if !ok { + return fmt.Errorf("%q is not a valid RFC date time", a) + } + end, ok := guessDate(b) + if !ok { + return fmt.Errorf("%q is not a valid RFC date time", b) + } + *tr = NewTimeInterval(start, end) + return nil +} + +// Contains return true if the given time is inside this time interval. +func (tr TimeRange) Contains(t time.Time) bool { + return !(t.Before(tr[0]) || t.After(tr[1])) +}