From edf7cadd46cb91a0602fbc72dc65d47261a38884 Mon Sep 17 00:00:00 2001 From: Dan Kortschak Date: Tue, 25 Jun 2024 10:05:51 +0930 Subject: [PATCH] x-pack/filebeat/input/httpjson: add ability to remove request trace logs This is a replay of #39969, but for the HTTPJSON input. The previous configuration system did not allow users to remove trace logs from agents after they are no longer needed. This is potential security risk as it leaves potentially sensitive information on the file system beyond its required lifetime. The mechanism for communicating to the input whether to write logs is not currently powerful enough to indicate that existing logs should be removed without deleting logs from other instances. So add an enabled configuration option to allow the target name to be sent independently of whether the files should be written or removed. The new option is optional, defaulting to the previous behaviour so that it can be opted into via progressive repair in the client integrations. --- CHANGELOG.next.asciidoc | 1 + .../docs/inputs/input-httpjson.asciidoc | 17 +-- .../filebeat/input/httpjson/config_request.go | 11 +- x-pack/filebeat/input/httpjson/input.go | 29 ++++- x-pack/filebeat/input/httpjson/input_test.go | 106 +++++++++++++++++- 5 files changed, 149 insertions(+), 15 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index da63b4565b6..240b21a16f1 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -282,6 +282,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Fix handling of infinite rate values in CEL rate limit handling logic. {pull}39940[39940] - Allow elision of set and append failure logging. {issue}34544[34544] {pull}39929[39929] - Add ability to remove request trace logs from CEL input. {pull}39969[39969] +- Add ability to remove request trace logs from HTTPJSON input. {pull}40003[40003] *Auditbeat* diff --git a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc index 242e2a877c3..21766a515a8 100644 --- a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc @@ -640,17 +640,20 @@ because when pagination does not exist at the parent level `parent_last_response NOTE: The `first_response` object at the moment can only store flat JSON structures (i.e. no support for JSONS having array at root level, NDJSON or Gzipped JSON), hence it should only be used in scenarios where the this is the case. Splits cannot be performed on `first_response`. It needs to be explicitly enabled by setting the flag `response.save_first_response` to `true` in the httpjson config. [float] -==== `request.tracer.filename` - -It is possible to log httpjson requests and responses to a local file-system for debugging configurations. -This option is enabled by setting the `request.tracer.filename` value. Additional options are available to -tune log rotation behavior. +==== `request.tracer.enable` -To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the filename and will be replaced with the input instance id. -For Example, `http-request-trace-*.ndjson`. +It is possible to log HTTP requests and responses to a local file-system for debugging configurations. +This option is enabled by setting `request.tracer.enabled` to true and setting the `request.tracer.filename` value. +Additional options are available to tune log rotation behavior. To delete existing logs, set `request.tracer.enabled` +to false without unsetting the filename option. Enabling this option compromises security and should only be used for debugging. +==== `request.tracer.filename` + +To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the +filename and will be replaced with the input instance id. For Example, `http-request-trace-*.ndjson`. + [float] ==== `request.tracer.maxsize` diff --git a/x-pack/filebeat/input/httpjson/config_request.go b/x-pack/filebeat/input/httpjson/config_request.go index e1238898651..34f62a1687a 100644 --- a/x-pack/filebeat/input/httpjson/config_request.go +++ b/x-pack/filebeat/input/httpjson/config_request.go @@ -136,7 +136,16 @@ type requestConfig struct { Transport httpcommon.HTTPTransportSettings `config:",inline"` - Tracer *lumberjack.Logger `config:"tracer"` + Tracer *tracerConfig `config:"tracer"` +} + +type tracerConfig struct { + Enabled *bool `config:"enabled"` + lumberjack.Logger `config:",inline"` +} + +func (t *tracerConfig) enabled() bool { + return t != nil && (t.Enabled == nil || *t.Enabled) } func (c *requestConfig) Validate() error { diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index cb91723f464..67daa7ef849 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -7,10 +7,13 @@ package httpjson import ( "context" "encoding/json" + "errors" "fmt" + "io/fs" "net" "net/http" "net/url" + "os" "path/filepath" "strings" "time" @@ -239,13 +242,18 @@ func newHTTPClient(ctx context.Context, config config, log *logp.Logger, reg *mo return &httpClient{client: client, limiter: limiter}, nil } +// lumberjackTimestamp is a glob expression matching the time format string used +// by lumberjack when rolling over logs, "2006-01-02T15-04-05.000". +// https://github.com/natefinch/lumberjack/blob/4cb27fcfbb0f35cb48c542c5ea80b7c1d18933d0/lumberjack.go#L39 +const lumberjackTimestamp = "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]-[0-9][0-9]-[0-9][0-9].[0-9][0-9][0-9]" + func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger, reg *monitoring.Registry) (*http.Client, error) { netHTTPClient, err := cfg.Transport.Client(clientOptions(cfg.URL.URL, cfg.KeepAlive.settings())...) if err != nil { return nil, err } - if cfg.Tracer != nil { + if cfg.Tracer.enabled() { w := zapcore.AddSync(cfg.Tracer) go func() { // Close the logger when we are done. @@ -265,6 +273,25 @@ func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger, maxSize = 0 } netHTTPClient.Transport = httplog.NewLoggingRoundTripper(netHTTPClient.Transport, traceLogger, maxSize, log) + } else if cfg.Tracer != nil { + // We have a trace log name, but we are not enabled, + // so remove all trace logs we own. + err = os.Remove(cfg.Tracer.Filename) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + log.Errorw("failed to remove request trace log", "path", cfg.Tracer.Filename, "error", err) + } + ext := filepath.Ext(cfg.Tracer.Filename) + base := strings.TrimSuffix(cfg.Tracer.Filename, ext) + paths, err := filepath.Glob(base + "-" + lumberjackTimestamp + ext) + if err != nil { + log.Errorw("failed to collect request trace log path names", "error", err) + } + for _, p := range paths { + err = os.Remove(p) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + log.Errorw("failed to remove request trace log", "path", p, "error", err) + } + } } if reg != nil { diff --git a/x-pack/filebeat/input/httpjson/input_test.go b/x-pack/filebeat/input/httpjson/input_test.go index 458b4becee6..dac538b54dd 100644 --- a/x-pack/filebeat/input/httpjson/input_test.go +++ b/x-pack/filebeat/input/httpjson/input_test.go @@ -27,12 +27,13 @@ import ( ) var testCases = []struct { - name string - setupServer func(testing.TB, http.HandlerFunc, map[string]interface{}) - baseConfig map[string]interface{} - handler http.HandlerFunc - expected []string - expectedFile string + name string + setupServer func(testing.TB, http.HandlerFunc, map[string]interface{}) + baseConfig map[string]interface{} + handler http.HandlerFunc + expected []string + expectedFile string + expectedNoFile string skipReason string }{ @@ -347,6 +348,90 @@ var testCases = []struct { }, expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"), }, + { + name: "tracer_filename_sanitization_enabled", + setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + t.Cleanup(func() { timeNow = time.Now }) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": http.MethodGet, + "request.transforms": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.$filter", + "value": "alertCreationTime ge [[.cursor.timestamp]]", + "default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`, + }, + }, + }, + "cursor": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "value": `[[index .last_response.body "@timestamp"]]`, + }, + }, + "request.tracer.enabled": true, + "request.tracer.filename": "logs/http-request-trace-*.ndjson", + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, + }, + expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"), + }, + { + name: "tracer_filename_sanitization_disabled", + setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + t.Cleanup(func() { timeNow = time.Now }) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": http.MethodGet, + "request.transforms": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.$filter", + "value": "alertCreationTime ge [[.cursor.timestamp]]", + "default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`, + }, + }, + }, + "cursor": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "value": `[[index .last_response.body "@timestamp"]]`, + }, + }, + "request.tracer.enabled": false, + "request.tracer.filename": "logs/http-request-trace-*.ndjson", + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, + }, + expectedNoFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi*"), + }, { name: "pagination", setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { @@ -1424,6 +1509,15 @@ func TestInput(t *testing.T) { t.Errorf("Expected log filename not found") } } + if test.expectedNoFile != "" { + paths, err := filepath.Glob(filepath.Join(tempDir, test.expectedNoFile)) + if err != nil { + t.Fatalf("unexpected error calling filepath.Glob(%q): %v", test.expectedNoFile, err) + } + if len(paths) != 0 { + t.Errorf("unexpected files found: %v", paths) + } + } assert.NoError(t, g.Wait()) }) }