diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index da63b4565b6..240b21a16f1 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -282,6 +282,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Fix handling of infinite rate values in CEL rate limit handling logic. {pull}39940[39940] - Allow elision of set and append failure logging. {issue}34544[34544] {pull}39929[39929] - Add ability to remove request trace logs from CEL input. {pull}39969[39969] +- Add ability to remove request trace logs from HTTPJSON input. {pull}40003[40003] *Auditbeat* diff --git a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc index 242e2a877c3..21766a515a8 100644 --- a/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc +++ b/x-pack/filebeat/docs/inputs/input-httpjson.asciidoc @@ -640,17 +640,20 @@ because when pagination does not exist at the parent level `parent_last_response NOTE: The `first_response` object at the moment can only store flat JSON structures (i.e. no support for JSONS having array at root level, NDJSON or Gzipped JSON), hence it should only be used in scenarios where the this is the case. Splits cannot be performed on `first_response`. It needs to be explicitly enabled by setting the flag `response.save_first_response` to `true` in the httpjson config. [float] -==== `request.tracer.filename` - -It is possible to log httpjson requests and responses to a local file-system for debugging configurations. -This option is enabled by setting the `request.tracer.filename` value. Additional options are available to -tune log rotation behavior. +==== `request.tracer.enable` -To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the filename and will be replaced with the input instance id. -For Example, `http-request-trace-*.ndjson`. +It is possible to log HTTP requests and responses to a local file-system for debugging configurations. +This option is enabled by setting `request.tracer.enabled` to true and setting the `request.tracer.filename` value. +Additional options are available to tune log rotation behavior. To delete existing logs, set `request.tracer.enabled` +to false without unsetting the filename option. Enabling this option compromises security and should only be used for debugging. +==== `request.tracer.filename` + +To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the +filename and will be replaced with the input instance id. For Example, `http-request-trace-*.ndjson`. + [float] ==== `request.tracer.maxsize` diff --git a/x-pack/filebeat/input/httpjson/config_request.go b/x-pack/filebeat/input/httpjson/config_request.go index e1238898651..34f62a1687a 100644 --- a/x-pack/filebeat/input/httpjson/config_request.go +++ b/x-pack/filebeat/input/httpjson/config_request.go @@ -136,7 +136,16 @@ type requestConfig struct { Transport httpcommon.HTTPTransportSettings `config:",inline"` - Tracer *lumberjack.Logger `config:"tracer"` + Tracer *tracerConfig `config:"tracer"` +} + +type tracerConfig struct { + Enabled *bool `config:"enabled"` + lumberjack.Logger `config:",inline"` +} + +func (t *tracerConfig) enabled() bool { + return t != nil && (t.Enabled == nil || *t.Enabled) } func (c *requestConfig) Validate() error { diff --git a/x-pack/filebeat/input/httpjson/input.go b/x-pack/filebeat/input/httpjson/input.go index cb91723f464..67daa7ef849 100644 --- a/x-pack/filebeat/input/httpjson/input.go +++ b/x-pack/filebeat/input/httpjson/input.go @@ -7,10 +7,13 @@ package httpjson import ( "context" "encoding/json" + "errors" "fmt" + "io/fs" "net" "net/http" "net/url" + "os" "path/filepath" "strings" "time" @@ -239,13 +242,18 @@ func newHTTPClient(ctx context.Context, config config, log *logp.Logger, reg *mo return &httpClient{client: client, limiter: limiter}, nil } +// lumberjackTimestamp is a glob expression matching the time format string used +// by lumberjack when rolling over logs, "2006-01-02T15-04-05.000". +// https://github.com/natefinch/lumberjack/blob/4cb27fcfbb0f35cb48c542c5ea80b7c1d18933d0/lumberjack.go#L39 +const lumberjackTimestamp = "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]-[0-9][0-9]-[0-9][0-9].[0-9][0-9][0-9]" + func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger, reg *monitoring.Registry) (*http.Client, error) { netHTTPClient, err := cfg.Transport.Client(clientOptions(cfg.URL.URL, cfg.KeepAlive.settings())...) if err != nil { return nil, err } - if cfg.Tracer != nil { + if cfg.Tracer.enabled() { w := zapcore.AddSync(cfg.Tracer) go func() { // Close the logger when we are done. @@ -265,6 +273,25 @@ func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger, maxSize = 0 } netHTTPClient.Transport = httplog.NewLoggingRoundTripper(netHTTPClient.Transport, traceLogger, maxSize, log) + } else if cfg.Tracer != nil { + // We have a trace log name, but we are not enabled, + // so remove all trace logs we own. + err = os.Remove(cfg.Tracer.Filename) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + log.Errorw("failed to remove request trace log", "path", cfg.Tracer.Filename, "error", err) + } + ext := filepath.Ext(cfg.Tracer.Filename) + base := strings.TrimSuffix(cfg.Tracer.Filename, ext) + paths, err := filepath.Glob(base + "-" + lumberjackTimestamp + ext) + if err != nil { + log.Errorw("failed to collect request trace log path names", "error", err) + } + for _, p := range paths { + err = os.Remove(p) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + log.Errorw("failed to remove request trace log", "path", p, "error", err) + } + } } if reg != nil { diff --git a/x-pack/filebeat/input/httpjson/input_test.go b/x-pack/filebeat/input/httpjson/input_test.go index 458b4becee6..dac538b54dd 100644 --- a/x-pack/filebeat/input/httpjson/input_test.go +++ b/x-pack/filebeat/input/httpjson/input_test.go @@ -27,12 +27,13 @@ import ( ) var testCases = []struct { - name string - setupServer func(testing.TB, http.HandlerFunc, map[string]interface{}) - baseConfig map[string]interface{} - handler http.HandlerFunc - expected []string - expectedFile string + name string + setupServer func(testing.TB, http.HandlerFunc, map[string]interface{}) + baseConfig map[string]interface{} + handler http.HandlerFunc + expected []string + expectedFile string + expectedNoFile string skipReason string }{ @@ -347,6 +348,90 @@ var testCases = []struct { }, expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"), }, + { + name: "tracer_filename_sanitization_enabled", + setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + t.Cleanup(func() { timeNow = time.Now }) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": http.MethodGet, + "request.transforms": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.$filter", + "value": "alertCreationTime ge [[.cursor.timestamp]]", + "default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`, + }, + }, + }, + "cursor": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "value": `[[index .last_response.body "@timestamp"]]`, + }, + }, + "request.tracer.enabled": true, + "request.tracer.filename": "logs/http-request-trace-*.ndjson", + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, + }, + expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"), + }, + { + name: "tracer_filename_sanitization_disabled", + setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { + // mock timeNow func to return a fixed value + timeNow = func() time.Time { + t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z") + return t + } + + server := httptest.NewServer(h) + config["request.url"] = server.URL + t.Cleanup(server.Close) + t.Cleanup(func() { timeNow = time.Now }) + }, + baseConfig: map[string]interface{}{ + "interval": 1, + "request.method": http.MethodGet, + "request.transforms": []interface{}{ + map[string]interface{}{ + "set": map[string]interface{}{ + "target": "url.params.$filter", + "value": "alertCreationTime ge [[.cursor.timestamp]]", + "default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`, + }, + }, + }, + "cursor": map[string]interface{}{ + "timestamp": map[string]interface{}{ + "value": `[[index .last_response.body "@timestamp"]]`, + }, + }, + "request.tracer.enabled": false, + "request.tracer.filename": "logs/http-request-trace-*.ndjson", + }, + handler: dateCursorHandler(), + expected: []string{ + `{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`, + `{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`, + }, + expectedNoFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi*"), + }, { name: "pagination", setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) { @@ -1424,6 +1509,15 @@ func TestInput(t *testing.T) { t.Errorf("Expected log filename not found") } } + if test.expectedNoFile != "" { + paths, err := filepath.Glob(filepath.Join(tempDir, test.expectedNoFile)) + if err != nil { + t.Fatalf("unexpected error calling filepath.Glob(%q): %v", test.expectedNoFile, err) + } + if len(paths) != 0 { + t.Errorf("unexpected files found: %v", paths) + } + } assert.NoError(t, g.Wait()) }) }