Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Downloader: Make time range configurable to download advisories from #413

Merged
merged 13 commits into from
Jul 28, 2023
3 changes: 1 addition & 2 deletions cmd/csaf_aggregator/mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,7 @@ func (w *worker) mirrorInternal() (*csaf.AggregatorCSAFProvider, error) {
w.client,
w.expr,
w.metadataProvider,
base,
nil)
base)

if err := afp.Process(w.mirrorFiles); err != nil {
return nil, err
Expand Down
16 changes: 9 additions & 7 deletions cmd/csaf_downloader/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package main
import (
"net/http"

"github.com/csaf-poc/csaf_distribution/v2/internal/models"
"github.com/csaf-poc/csaf_distribution/v2/internal/options"
)

Expand All @@ -20,13 +21,14 @@ const (
)

type config struct {
Directory *string `short:"d" long:"directory" description:"DIRectory to store the downloaded files in" value-name:"DIR" toml:"directory"`
Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"`
IgnoreSignatureCheck bool `long:"ignoresigcheck" description:"Ignore signature check results, just warn on mismatch" toml:"ignoresigcheck"`
Version bool `long:"version" description:"Display version of the binary" toml:"-"`
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Directory *string `short:"d" long:"directory" description:"DIRectory to store the downloaded files in" value-name:"DIR" toml:"directory"`
Insecure bool `long:"insecure" description:"Do not check TLS certificates from provider" toml:"insecure"`
IgnoreSignatureCheck bool `long:"ignoresigcheck" description:"Ignore signature check results, just warn on mismatch" toml:"ignoresigcheck"`
Version bool `long:"version" description:"Display version of the binary" toml:"-"`
Verbose bool `long:"verbose" short:"v" description:"Verbose output" toml:"verbose"`
Rate *float64 `long:"rate" short:"r" description:"The average upper limit of https operations per second (defaults to unlimited)" toml:"rate"`
Worker int `long:"worker" short:"w" description:"NUMber of concurrent downloads" value-name:"NUM" toml:"worker"`
Range *models.TimeRange `long:"timerange" short:"t" description:"RANGE of time from which advisories to download" value-name:"RANGE" toml:"timerange"`

ExtraHeader http.Header `long:"header" short:"H" description:"One or more extra HTTP header fields" toml:"header"`

Expand Down
8 changes: 6 additions & 2 deletions cmd/csaf_downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,12 @@ func (d *downloader) download(ctx context.Context, domain string) error {
client,
d.eval,
lpmd.Document,
base,
nil)
base)

// Do we need time range based filtering?
if d.cfg.Range != nil {
afp.AgeAccept = d.cfg.Range.Contains
}

return afp.Process(func(label csaf.TLPLabel, files []csaf.AdvisoryFile) error {
return d.downloadFiles(ctx, label, files)
Expand Down
86 changes: 60 additions & 26 deletions csaf/advisories.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,14 @@
package csaf

import (
"bufio"
"encoding/csv"
"fmt"
"io"
"log"
"net/http"
"net/url"
"strings"
"time"

"github.com/csaf-poc/csaf_distribution/v2/util"
)
Expand Down Expand Up @@ -71,11 +74,12 @@ func (haf HashedAdvisoryFile) SignURL() string { return haf.name(3, ".asc") }
// AdvisoryFileProcessor implements the extraction of
// advisory file names from a given provider metadata.
type AdvisoryFileProcessor struct {
client util.Client
expr *util.PathEval
doc any
base *url.URL
log func(format string, args ...any)
AgeAccept func(time.Time) bool
Log func(format string, args ...any)
client util.Client
expr *util.PathEval
doc any
base *url.URL
}

// NewAdvisoryFileProcessor constructs an filename extractor
Expand All @@ -85,14 +89,12 @@ func NewAdvisoryFileProcessor(
expr *util.PathEval,
doc any,
base *url.URL,
log func(format string, args ...any),
) *AdvisoryFileProcessor {
return &AdvisoryFileProcessor{
client: client,
expr: expr,
doc: doc,
base: base,
log: log,
}
}

Expand All @@ -111,7 +113,7 @@ func empty(arr []string) bool {
func (afp *AdvisoryFileProcessor) Process(
fn func(TLPLabel, []AdvisoryFile) error,
) error {
lg := afp.log
lg := afp.Log
if lg == nil {
lg = func(format string, args ...any) {
log.Printf("AdvisoryFileProcessor.Process: "+format, args...)
Expand Down Expand Up @@ -173,7 +175,8 @@ func (afp *AdvisoryFileProcessor) Process(
continue
}

files, err := afp.loadIndex(base, lg)
// Use changes.csv to be able to filter by age.
files, err := afp.loadChanges(base, lg)
if err != nil {
return err
}
Expand All @@ -186,9 +189,9 @@ func (afp *AdvisoryFileProcessor) Process(
return nil
}

// loadIndex loads baseURL/index.txt and returns a list of files
// loadChanges loads baseURL/changes.csv and returns a list of files
// prefixed by baseURL/.
func (afp *AdvisoryFileProcessor) loadIndex(
func (afp *AdvisoryFileProcessor) loadChanges(
baseURL string,
lg func(string, ...any),
) ([]AdvisoryFile, error) {
Expand All @@ -197,29 +200,53 @@ func (afp *AdvisoryFileProcessor) loadIndex(
if err != nil {
return nil, err
}
changesURL := base.JoinPath("changes.csv").String()

indexURL := base.JoinPath("index.txt").String()
resp, err := afp.client.Get(indexURL)
resp, err := afp.client.Get(changesURL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
var files []AdvisoryFile

scanner := bufio.NewScanner(resp.Body)
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("fetching %s failed. Status code %d (%s)",
changesURL, resp.StatusCode, resp.Status)
}

for line := 1; scanner.Scan(); line++ {
u := scanner.Text()
if _, err := url.Parse(u); err != nil {
lg("index.txt contains invalid URL %q in line %d", u, line)
defer resp.Body.Close()
var files []AdvisoryFile
c := csv.NewReader(resp.Body)
const (
pathColumn = 0
timeColumn = 1
)
for line := 1; ; line++ {
r, err := c.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, err
}
if len(r) < 2 {
lg("%q has not enough columns in line %d", line)
continue
}
t, err := time.Parse(time.RFC3339, r[timeColumn])
if err != nil {
lg("%q has an invalid time stamp in line %d: %v", changesURL, line, err)
continue
}
// Apply date range filtering.
if afp.AgeAccept != nil && !afp.AgeAccept(t) {
continue
}
path := r[pathColumn]
if _, err := url.Parse(path); err != nil {
lg("%q contains an invalid URL %q in line %d", changesURL, path, line)
continue
}
files = append(files,
PlainAdvisoryFile(base.JoinPath(u).String()))
}

if err := scanner.Err(); err != nil {
return nil, err
PlainAdvisoryFile(base.JoinPath(path).String()))
}
return files, nil
}
Expand Down Expand Up @@ -287,6 +314,13 @@ func (afp *AdvisoryFileProcessor) processROLIE(

rfeed.Entries(func(entry *Entry) {

// Filter if we have date checking.
if afp.AgeAccept != nil {
if pub := time.Time(entry.Published); !pub.IsZero() && !afp.AgeAccept(pub) {
return
}
}

var self, sha256, sha512, sign string

for i := range entry.Link {
Expand Down
31 changes: 31 additions & 0 deletions docs/csaf_downloader.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Application Options:
-v, --verbose Verbose output
-r, --rate= The average upper limit of https operations per second (defaults to unlimited)
-w, --worker=NUM NUMber of concurrent downloads (default: 2)
-t, --timerange=RANGE RANGE of time from which advisories to download
-H, --header= One or more extra HTTP header fields
--validator=URL URL to validate documents remotely
--validatorcache=FILE FILE to cache remote validations
Expand Down Expand Up @@ -54,4 +55,34 @@ worker = 2
# validator # not set by default
# validatorcache # not set by default
validatorpreset = ["mandatory"]
# timerange # not set by default
```

The `timerange` parameter enables downloading advisories which last changes falls
into a given intervall. There are three possible notations:

1. Relative. If the given string follows the rules of being a [Go duration](https://pkg.go.dev/[email protected]#ParseDuration)
the time interval from now minus that duration till now is used.
E.g. `"3h"` means downloading the advisories that have changed in the last three hours.

2. Absolute. If the given string is an RFC 3339 date timestamp the time interval between
this date and now is used.
E.g. `"2006-01-02"` means that all files between 2006 January 2nd and now going to being
downloaded.
Accepted patterns are:
- `"2006-01-02T15:04:05Z"`
- `"2006-01-02T15:04:05+07:00"`
- `"2006-01-02T15:04:05-07:00"`
- `"2006-01-02T15:04:05"`
- `"2006-01-02T15:04"`
- `"2006-01-02T15"`
- `"2006-01-02"`
- `"2006-01"`
- `"2006"`

Missing parts are set to the smallest value possible in that field.

3. Range. Same as 2 but separated by a `,` to span an interval. e.g `2019,2024`
spans an interval from 1st January 2019 to the 1st January of 2024.

All interval boundaries are inclusive.
92 changes: 92 additions & 0 deletions internal/models/models.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// This file is Free Software under the MIT License
// without warranty, see README.md and LICENSES/MIT.txt for details.
//
// SPDX-License-Identifier: MIT
//
// SPDX-FileCopyrightText: 2023 German Federal Office for Information Security (BSI) <https://www.bsi.bund.de>
// Software-Engineering: 2023 Intevation GmbH <https://intevation.de>

// Package models contains helper models used in the tools internally.
package models

import (
"fmt"
"strings"
"time"
)

// TimeRange is a time interval.
type TimeRange [2]time.Time

// NewTimeInterval creates a new time range.
// The time values will be sorted.
func NewTimeInterval(a, b time.Time) TimeRange {
if b.Before(a) {
a, b = b, a
}
return TimeRange{a, b}
}

// guessDate tries to guess an RFC 3339 date time from a given string.
func guessDate(s string) (time.Time, bool) {
for _, layout := range []string{
"2006-01-02T15:04:05Z07:00",
"2006-01-02T15:04:05",
"2006-01-02T15:04",
"2006-01-02T15",
"2006-01-02",
"2006-01",
"2006",
} {
if t, err := time.Parse(layout, s); err == nil {
return t, true
}
}
return time.Time{}, false
}

// UnmarshalText implements [encoding/text.TextUnmarshaler].
func (tr *TimeRange) UnmarshalText(text []byte) error {
return tr.UnmarshalFlag(string(text))
}

// UnmarshalFlag implements [go-flags/Unmarshaler].
func (tr *TimeRange) UnmarshalFlag(s string) error {
s = strings.TrimSpace(s)

// Handle relative case first.
if duration, err := time.ParseDuration(s); err == nil {
now := time.Now()
*tr = NewTimeInterval(now.Add(-duration), now)
return nil
}

a, b, found := strings.Cut(s, ",")
a, b = strings.TrimSpace(a), strings.TrimSpace(b)

// Only start date?
if !found {
start, ok := guessDate(a)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", a)
}
*tr = NewTimeInterval(start, time.Now())
return nil
}
// Real interval
start, ok := guessDate(a)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", a)
}
end, ok := guessDate(b)
if !ok {
return fmt.Errorf("%q is not a valid RFC date time", b)
}
*tr = NewTimeInterval(start, end)
return nil
}

// Contains return true if the given time is inside this time interval.
func (tr TimeRange) Contains(t time.Time) bool {
return !(t.Before(tr[0]) || t.After(tr[1]))
}
Loading