Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

loki.secretfilter: utilize fuzz testing for complex input #2630

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
20 changes: 12 additions & 8 deletions .github/workflows/fuzz-go.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
inputs:
directory:
description: "Directory to search for Go fuzz tests in."
default: '.'
default: "."
required: false
type: string
fuzz-time:
Expand Down Expand Up @@ -33,15 +33,20 @@ jobs:
RESULTS=()

for FILE in $TEST_FILES; do
FUZZ_FUNC=$(grep -E 'func Fuzz\w*' $FILE | sed 's/func //' | sed 's/(.*$//')
if [ -z "$FUZZ_FUNC" ]; then
FUZZ_FUNCS=$(grep -E 'func Fuzz\w*' $FILE | sed 's/func //' | sed 's/(.*$//')
if [ -z "$FUZZ_FUNCS" ]; then
continue
fi

PACKAGE_PATH=$(dirname ${FILE#${{ inputs.directory }}/})
RESULTS+=("{\"package\":\"$PACKAGE_PATH\",\"function\":\"$FUZZ_FUNC\"}")

echo "Found $FUZZ_FUNC in $PACKAGE_PATH"
for FUZZ_FUNC in $FUZZ_FUNCS; do
if [ -z "$FUZZ_FUNC" ]; then
continue
fi

RESULTS+=("{\"package\":\"$PACKAGE_PATH\",\"function\":\"$FUZZ_FUNC\"}")
echo "Found $FUZZ_FUNC in $PACKAGE_PATH"
done
done

NUM_RESULTS=${#RESULTS[@]}
Expand Down Expand Up @@ -74,8 +79,7 @@ jobs:
cache: false

- name: Find cache location
run:
echo "FUZZ_CACHE=$(go env GOCACHE)/fuzz" >> $GITHUB_ENV
run: echo "FUZZ_CACHE=$(go env GOCACHE)/fuzz" >> $GITHUB_ENV

- name: Restore fuzz cache
uses: actions/cache@v4
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ Main (unreleased)

- Fixed an issue where `loki.process` would sometimes output live debugging entries out-of-order (@thampiotr)

- (_Experimental_) Fixed several out-of-memory bugs in `loki.secretfilter`. (@kelnage)

v1.6.1
-----------------

Expand Down
14 changes: 14 additions & 0 deletions internal/component/loki/secretfilter/secretfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,12 @@ func (c *Component) processEntry(entry loki.Entry) loki.Entry {
secret = occ[1]
}

// If secret is empty string, ignore
if secret == "" {
level.Debug(c.opts.Logger).Log("msg", "empty secret found", "rule", r.name)
continue
}

// Check if the secret is in the allowlist
var allowRule *AllowRule = nil
// First check the global allowlist
Expand Down Expand Up @@ -289,6 +295,10 @@ func (c *Component) Update(args component.Arguments) error {

// Compile regexes
for _, rule := range gitleaksCfg.Rules {
// If the rule regex is empty, skip this rule
if rule.Regex == "" {
continue
}
// If specific secret types are provided, only include rules that match the types
if len(c.args.Types) > 0 {
var found bool
Expand All @@ -308,6 +318,10 @@ func (c *Component) Update(args component.Arguments) error {
level.Error(c.opts.Logger).Log("msg", "error compiling regex", "error", err)
return err
}
// If the rule regex matches the empty string, skip this rule
if re.Match([]byte("")) {
continue
}

// Compile rule-specific allowlist regexes
var allowlist []AllowRule
Expand Down
127 changes: 127 additions & 0 deletions internal/component/loki/secretfilter/secretfilter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,133 @@ func runBenchmarks(b *testing.B, config string, percentageSecrets int, secretNam
}
}

var sampleFuzzLogLines = []string{
`key=value1,value2 log=fmt test=1 secret=password`,
`{"key":["value1","value2"],"log":"fmt","test":1,"secret":"password"}`,
`1970-01-01 00:00:00 pattern value1,value2 1 secret`,
}

func FuzzProcessEntry(f *testing.F) {
for _, line := range sampleFuzzLogLines {
f.Add(line)
}
for _, testLog := range testLogs {
f.Add(testLog.log)
}

comps := make([]*Component, 0, len(testConfigs))
opts := component.Options{
Logger: util.TestLogger(f),
OnStateChange: func(e component.Exports) {},
GetServiceData: getServiceData,
}
ch1 := loki.NewLogsReceiver()

// Create components
for _, config := range testConfigs {
var args Arguments
require.NoError(f, syntax.Unmarshal([]byte(config), &args))
if args.GitleaksConfig != "" {
continue // Skip the configs using a custom gitleaks config file
}

args.ForwardTo = []loki.LogsReceiver{ch1}
c, err := New(opts, args)
require.NoError(f, err)
comps = append(comps, c)
}

f.Fuzz(func(t *testing.T, log string) {
for _, c := range comps {
entry := loki.Entry{Labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Now(), Line: log}}
c.processEntry(entry)
}
})
}

func FuzzConfig(f *testing.F) {
for _, testConf := range testConfigs {
for _, testLog := range testLogs {
f.Add(testConf, testLog.log)
}
}
opts := component.Options{
Logger: util.TestLogger(f),
OnStateChange: func(e component.Exports) {},
GetServiceData: getServiceData,
}
ch1 := loki.NewLogsReceiver()

f.Fuzz(func(t *testing.T, config string, log string) {
var args Arguments
err := syntax.Unmarshal([]byte(config), &args)
if err != nil {
// ignore parsing errors, as we aren't fuzz testing the Alloy config parser
return
}
if args.GitleaksConfig != "" {
return // Skip the configs that use a custom gitleaks config file
}

args.ForwardTo = []loki.LogsReceiver{ch1}
c, err := New(opts, args)
if err != nil {
// ignore regex parsing errors
if strings.HasPrefix(err.Error(), "error parsing regexp") {
return
}
t.Errorf("error configuring component: %v", err)
}

entry := loki.Entry{Labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Now(), Line: log}}
c.processEntry(entry)
})
}

func FuzzGitleaksConfig(f *testing.F) {
for _, testConf := range testConfigs {
for _, testGitleaksConf := range customGitleaksConfig {
for _, testLog := range testLogs {
f.Add(testConf, testGitleaksConf, testLog.log)
}
}
}
opts := component.Options{
Logger: util.TestLogger(f),
OnStateChange: func(e component.Exports) {},
GetServiceData: getServiceData,
}
ch1 := loki.NewLogsReceiver()

f.Fuzz(func(t *testing.T, config string, gitleaksConfig string, log string) {
var args Arguments
err := syntax.Unmarshal([]byte(config), &args)
if err != nil {
// ignore parsing errors, as we aren't fuzz testing the Alloy config parser
return
Comment on lines +707 to +708
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if we're not fuzz testing the config parser, wouldn't we still want to know if this fails? Presumably it should succeed so that we can test other code? And if it fails silently, it could mean the fuzz tests didn't really run?

I guess that testConfigs contains some invalid configurations and we want to ignore them, but wouldn't it be cleaner to only have valid configs as input and to not have config as an input to f.Fuzz?

}
args.GitleaksConfig = createTempGitleaksConfig(t, gitleaksConfig)
defer deleteTempGitLeaksConfig(t, args.GitleaksConfig)

args.ForwardTo = []loki.LogsReceiver{ch1}
c, err := New(opts, args)
if err != nil {
// ignore regex parsing errors - out of scope
if strings.HasPrefix(err.Error(), "error parsing regexp") {
return
}
// ignore toml parsing errors - out of scope
if strings.HasPrefix(err.Error(), "toml:") {
return
}
t.Errorf("error configuring component: %v", err)
}

entry := loki.Entry{Labels: model.LabelSet{}, Entry: logproto.Entry{Timestamp: time.Now(), Line: log}}
c.processEntry(entry)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not also check the output on ch1?

})
}

func getServiceData(name string) (interface{}, error) {
switch name {
case livedebugging.ServiceName:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
go test fuzz v1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if these are needed, and would be happy to hear other reviewers' opinions. I guess including them as test cases are enough.

string("forward_to=[]")
string("\n\t\ttitle = \"gitleaks custom config\"\n\n\t\t[[rules]]\n\t\tid = \"my-fake-secret\"\n\t\tdescription = \"Identified a fake secret\"\n\t\t")
string("{\n\t\t\t\"message\": \"This is a simple log message with a secret value AIzaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA !\n\t\t}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
go test fuzz v1
string("\n\t\tforward_to = []\n\t\tredact_with = \"<ALLOY-REDACTED-SECR<ALLOY-ET:$SECRET_NAME>\"\n\t")
string("\n\t\ttitle = \"gitleaks custom config\"\n\n\t\t[[rules]]\n\t\tid = \"my-fake-secret\"\n\t\tdescription = \"Identified a fake secret\"\n\t\tregex = '''(?i)\\b(fakeSecret\\d\", \"faiption ='|\\\"|\\n|\\r||;]|$)'''\t\n\t\t[allowlist]\n\t\tregexes = [\"abc\\\\d{3}\", \"fakeSecret[9]{5}\"]\n\t")
Comment on lines +2 to +3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm new to fuzz testing - what sorts of variations of these configs is Go actually going to come up with? E.g. if it just puts in some random malformed string as a config, how do we know we should reject it? And even for secrets, how can we make sure in our test that we filter the secrets we want to filter?

string("{\n\t\t\t\"message\": \"This is a simple log message with a secret value fakeSecret12345 !\n\t\t}")
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
go test fuzz v1
string("forward_to=[]")
string("[[rules]]\nd=\"\"\ne=\"\"\nregex='''()'''\n[[rules.allowlista]]\ns=[\"\\\\\",\"\\\\\"]\n[[rules.allowlists]]\nregexes=[\"\\\\{\",\"]\"\"")
string("{\n\t\t\t\"message\": \"This is a simple log message with a secret value eyJrIjoiAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA and another secret value AIzaAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA !\n\t\t}")
Loading