Skip to content

Commit

Permalink
x-pack/filebeat/input/httpjson: add ability to remove request trace logs
Browse files Browse the repository at this point in the history
This is a replay of elastic#39969, but for the HTTPJSON input.

The previous configuration system did not allow users to remove trace
logs from agents after they are no longer needed. This is potential
security risk as it leaves potentially sensitive information on the file
system beyond its required lifetime. The mechanism for communicating to
the input whether to write logs is not currently powerful enough to
indicate that existing logs should be removed without deleting logs from
other instances. So add an enabled configuration option to allow the
target name to be sent independently of whether the files should be
written or removed.

The new option is optional, defaulting to the previous behaviour so
that it can be opted into via progressive repair in the client
integrations.
  • Loading branch information
efd6 committed Jun 25, 2024
1 parent e3a8223 commit edf7cad
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 15 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.next.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff]
- Fix handling of infinite rate values in CEL rate limit handling logic. {pull}39940[39940]
- Allow elision of set and append failure logging. {issue}34544[34544] {pull}39929[39929]
- Add ability to remove request trace logs from CEL input. {pull}39969[39969]
- Add ability to remove request trace logs from HTTPJSON input. {pull}40003[40003]

*Auditbeat*

Expand Down
17 changes: 10 additions & 7 deletions x-pack/filebeat/docs/inputs/input-httpjson.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -640,17 +640,20 @@ because when pagination does not exist at the parent level `parent_last_response
NOTE: The `first_response` object at the moment can only store flat JSON structures (i.e. no support for JSONS having array at root level, NDJSON or Gzipped JSON), hence it should only be used in scenarios where the this is the case. Splits cannot be performed on `first_response`. It needs to be explicitly enabled by setting the flag `response.save_first_response` to `true` in the httpjson config.

[float]
==== `request.tracer.filename`

It is possible to log httpjson requests and responses to a local file-system for debugging configurations.
This option is enabled by setting the `request.tracer.filename` value. Additional options are available to
tune log rotation behavior.
==== `request.tracer.enable`

To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the filename and will be replaced with the input instance id.
For Example, `http-request-trace-*.ndjson`.
It is possible to log HTTP requests and responses to a local file-system for debugging configurations.
This option is enabled by setting `request.tracer.enabled` to true and setting the `request.tracer.filename` value.
Additional options are available to tune log rotation behavior. To delete existing logs, set `request.tracer.enabled`
to false without unsetting the filename option.

Enabling this option compromises security and should only be used for debugging.

==== `request.tracer.filename`

To differentiate the trace files generated from different input instances, a placeholder `*` can be added to the
filename and will be replaced with the input instance id. For Example, `http-request-trace-*.ndjson`.

[float]
==== `request.tracer.maxsize`

Expand Down
11 changes: 10 additions & 1 deletion x-pack/filebeat/input/httpjson/config_request.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,16 @@ type requestConfig struct {

Transport httpcommon.HTTPTransportSettings `config:",inline"`

Tracer *lumberjack.Logger `config:"tracer"`
Tracer *tracerConfig `config:"tracer"`
}

type tracerConfig struct {
Enabled *bool `config:"enabled"`
lumberjack.Logger `config:",inline"`
}

func (t *tracerConfig) enabled() bool {
return t != nil && (t.Enabled == nil || *t.Enabled)
}

func (c *requestConfig) Validate() error {
Expand Down
29 changes: 28 additions & 1 deletion x-pack/filebeat/input/httpjson/input.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@ package httpjson
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"strings"
"time"
Expand Down Expand Up @@ -239,13 +242,18 @@ func newHTTPClient(ctx context.Context, config config, log *logp.Logger, reg *mo
return &httpClient{client: client, limiter: limiter}, nil
}

// lumberjackTimestamp is a glob expression matching the time format string used
// by lumberjack when rolling over logs, "2006-01-02T15-04-05.000".
// https://github.com/natefinch/lumberjack/blob/4cb27fcfbb0f35cb48c542c5ea80b7c1d18933d0/lumberjack.go#L39
const lumberjackTimestamp = "[0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9]T[0-9][0-9]-[0-9][0-9]-[0-9][0-9].[0-9][0-9][0-9]"

func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger, reg *monitoring.Registry) (*http.Client, error) {
netHTTPClient, err := cfg.Transport.Client(clientOptions(cfg.URL.URL, cfg.KeepAlive.settings())...)
if err != nil {
return nil, err
}

if cfg.Tracer != nil {
if cfg.Tracer.enabled() {
w := zapcore.AddSync(cfg.Tracer)
go func() {
// Close the logger when we are done.
Expand All @@ -265,6 +273,25 @@ func newNetHTTPClient(ctx context.Context, cfg *requestConfig, log *logp.Logger,
maxSize = 0
}
netHTTPClient.Transport = httplog.NewLoggingRoundTripper(netHTTPClient.Transport, traceLogger, maxSize, log)
} else if cfg.Tracer != nil {
// We have a trace log name, but we are not enabled,
// so remove all trace logs we own.
err = os.Remove(cfg.Tracer.Filename)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
log.Errorw("failed to remove request trace log", "path", cfg.Tracer.Filename, "error", err)
}
ext := filepath.Ext(cfg.Tracer.Filename)
base := strings.TrimSuffix(cfg.Tracer.Filename, ext)
paths, err := filepath.Glob(base + "-" + lumberjackTimestamp + ext)
if err != nil {
log.Errorw("failed to collect request trace log path names", "error", err)
}
for _, p := range paths {
err = os.Remove(p)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
log.Errorw("failed to remove request trace log", "path", p, "error", err)
}
}
}

if reg != nil {
Expand Down
106 changes: 100 additions & 6 deletions x-pack/filebeat/input/httpjson/input_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,13 @@ import (
)

var testCases = []struct {
name string
setupServer func(testing.TB, http.HandlerFunc, map[string]interface{})
baseConfig map[string]interface{}
handler http.HandlerFunc
expected []string
expectedFile string
name string
setupServer func(testing.TB, http.HandlerFunc, map[string]interface{})
baseConfig map[string]interface{}
handler http.HandlerFunc
expected []string
expectedFile string
expectedNoFile string

skipReason string
}{
Expand Down Expand Up @@ -347,6 +348,90 @@ var testCases = []struct {
},
expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"),
},
{
name: "tracer_filename_sanitization_enabled",
setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) {
// mock timeNow func to return a fixed value
timeNow = func() time.Time {
t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z")
return t
}

server := httptest.NewServer(h)
config["request.url"] = server.URL
t.Cleanup(server.Close)
t.Cleanup(func() { timeNow = time.Now })
},
baseConfig: map[string]interface{}{
"interval": 1,
"request.method": http.MethodGet,
"request.transforms": []interface{}{
map[string]interface{}{
"set": map[string]interface{}{
"target": "url.params.$filter",
"value": "alertCreationTime ge [[.cursor.timestamp]]",
"default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`,
},
},
},
"cursor": map[string]interface{}{
"timestamp": map[string]interface{}{
"value": `[[index .last_response.body "@timestamp"]]`,
},
},
"request.tracer.enabled": true,
"request.tracer.filename": "logs/http-request-trace-*.ndjson",
},
handler: dateCursorHandler(),
expected: []string{
`{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`,
`{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`,
`{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`,
},
expectedFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi.ndjson"),
},
{
name: "tracer_filename_sanitization_disabled",
setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) {
// mock timeNow func to return a fixed value
timeNow = func() time.Time {
t, _ := time.Parse(time.RFC3339, "2002-10-02T15:00:00Z")
return t
}

server := httptest.NewServer(h)
config["request.url"] = server.URL
t.Cleanup(server.Close)
t.Cleanup(func() { timeNow = time.Now })
},
baseConfig: map[string]interface{}{
"interval": 1,
"request.method": http.MethodGet,
"request.transforms": []interface{}{
map[string]interface{}{
"set": map[string]interface{}{
"target": "url.params.$filter",
"value": "alertCreationTime ge [[.cursor.timestamp]]",
"default": `alertCreationTime ge [[formatDate (now (parseDuration "-10m")) "2006-01-02T15:04:05Z"]]`,
},
},
},
"cursor": map[string]interface{}{
"timestamp": map[string]interface{}{
"value": `[[index .last_response.body "@timestamp"]]`,
},
},
"request.tracer.enabled": false,
"request.tracer.filename": "logs/http-request-trace-*.ndjson",
},
handler: dateCursorHandler(),
expected: []string{
`{"@timestamp":"2002-10-02T15:00:00Z","foo":"bar"}`,
`{"@timestamp":"2002-10-02T15:00:01Z","foo":"bar"}`,
`{"@timestamp":"2002-10-02T15:00:02Z","foo":"bar"}`,
},
expectedNoFile: filepath.Join("logs", "http-request-trace-httpjson-foo-eb837d4c-5ced-45ed-b05c-de658135e248_https_somesource_someapi*"),
},
{
name: "pagination",
setupServer: func(t testing.TB, h http.HandlerFunc, config map[string]interface{}) {
Expand Down Expand Up @@ -1424,6 +1509,15 @@ func TestInput(t *testing.T) {
t.Errorf("Expected log filename not found")
}
}
if test.expectedNoFile != "" {
paths, err := filepath.Glob(filepath.Join(tempDir, test.expectedNoFile))
if err != nil {
t.Fatalf("unexpected error calling filepath.Glob(%q): %v", test.expectedNoFile, err)
}
if len(paths) != 0 {
t.Errorf("unexpected files found: %v", paths)
}
}
assert.NoError(t, g.Wait())
})
}
Expand Down

0 comments on commit edf7cad

Please sign in to comment.