From 865623a7285007a06f82f153c7f3a22e7d025853 Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Mon, 27 Nov 2023 11:06:57 -0800 Subject: [PATCH 1/3] Add additional transaction labels with error details to requests. --- ...1111919-Enrich-transaction-APM-errors.yaml | 32 +++++++++++++++++++ internal/pkg/api/error.go | 16 +++++++++- 2 files changed, 47 insertions(+), 1 deletion(-) create mode 100644 changelog/fragments/1701111919-Enrich-transaction-APM-errors.yaml diff --git a/changelog/fragments/1701111919-Enrich-transaction-APM-errors.yaml b/changelog/fragments/1701111919-Enrich-transaction-APM-errors.yaml new file mode 100644 index 000000000..0b78d8b1b --- /dev/null +++ b/changelog/fragments/1701111919-Enrich-transaction-APM-errors.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Enrich transaction APM errors + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; a word indicating the component this changeset affects. +component: + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: 3098 diff --git a/internal/pkg/api/error.go b/internal/pkg/api/error.go index dff6002ef..4d31f3ea8 100644 --- a/internal/pkg/api/error.go +++ b/internal/pkg/api/error.go @@ -8,6 +8,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "net/http" "os" "strings" @@ -15,6 +16,7 @@ import ( "github.com/elastic/fleet-server/v7/internal/pkg/apikey" "github.com/elastic/fleet-server/v7/internal/pkg/dl" + "github.com/elastic/fleet-server/v7/internal/pkg/es" "github.com/elastic/fleet-server/v7/internal/pkg/file" "github.com/elastic/fleet-server/v7/internal/pkg/file/delivery" "github.com/elastic/fleet-server/v7/internal/pkg/file/uploader" @@ -518,13 +520,25 @@ func (er HTTPErrResp) Write(w http.ResponseWriter) error { func ErrorResp(w http.ResponseWriter, r *http.Request, err error) { zlog := hlog.FromRequest(r) resp := NewHTTPErrResp(err) - e := zlog.WithLevel(resp.Level).Err(err).Int(ECSHTTPResponseCode, resp.StatusCode) + e := zlog.WithLevel(resp.Level).Err(err).Int(ECSHTTPResponseCode, resp.StatusCode).Str("error.type", fmt.Sprintf("%T", err)) if ts, ok := logger.CtxStartTime(r.Context()); ok { e = e.Int64(ECSEventDuration, time.Since(ts).Nanoseconds()) } e.Msg("HTTP request error") if resp.StatusCode >= 500 { + trans := apm.TransactionFromContext(r.Context()) + switch typ := err.(type) { + case *es.ErrElastic: + trans.Context.SetLabel("error.type", "ErrElastic") + trans.Context.SetLabel("error.details.status", typ.Status) + trans.Context.SetLabel("error.details.type", typ.Type) + trans.Context.SetLabel("error.details.reason", typ.Reason) + trans.Context.SetLabel("error.details.cause.type", typ.Cause.Type) + trans.Context.SetLabel("error.details.cause.reason", typ.Cause.Reason) + default: + trans.Context.SetLabel("error.type", fmt.Sprintf("%T", typ)) + } apm.CaptureError(r.Context(), err).Send() } From 012ce5023c5ab9aeb697e8c341fa1e91901a449b Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Mon, 27 Nov 2023 13:22:03 -0800 Subject: [PATCH 2/3] Fix test issue --- internal/pkg/api/error.go | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/internal/pkg/api/error.go b/internal/pkg/api/error.go index 4d31f3ea8..32d22bc4f 100644 --- a/internal/pkg/api/error.go +++ b/internal/pkg/api/error.go @@ -527,17 +527,18 @@ func ErrorResp(w http.ResponseWriter, r *http.Request, err error) { e.Msg("HTTP request error") if resp.StatusCode >= 500 { - trans := apm.TransactionFromContext(r.Context()) - switch typ := err.(type) { - case *es.ErrElastic: - trans.Context.SetLabel("error.type", "ErrElastic") - trans.Context.SetLabel("error.details.status", typ.Status) - trans.Context.SetLabel("error.details.type", typ.Type) - trans.Context.SetLabel("error.details.reason", typ.Reason) - trans.Context.SetLabel("error.details.cause.type", typ.Cause.Type) - trans.Context.SetLabel("error.details.cause.reason", typ.Cause.Reason) - default: - trans.Context.SetLabel("error.type", fmt.Sprintf("%T", typ)) + if trans := apm.TransactionFromContext(r.Context()); trans != nil { + switch typ := err.(type) { + case *es.ErrElastic: + trans.Context.SetLabel("error.type", "ErrElastic") + trans.Context.SetLabel("error.details.status", typ.Status) + trans.Context.SetLabel("error.details.type", typ.Type) + trans.Context.SetLabel("error.details.reason", typ.Reason) + trans.Context.SetLabel("error.details.cause.type", typ.Cause.Type) + trans.Context.SetLabel("error.details.cause.reason", typ.Cause.Reason) + default: + trans.Context.SetLabel("error.type", fmt.Sprintf("%T", typ)) + } } apm.CaptureError(r.Context(), err).Send() } From a3c703183cbc5b4fdfdf4f2dfc094c867bf1e8ce Mon Sep 17 00:00:00 2001 From: michel-laterman Date: Mon, 27 Nov 2023 16:31:22 -0800 Subject: [PATCH 3/3] Handle wrapped errors, add unit tests --- internal/pkg/api/error.go | 18 +++---- internal/pkg/api/error_test.go | 94 ++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 9 deletions(-) create mode 100644 internal/pkg/api/error_test.go diff --git a/internal/pkg/api/error.go b/internal/pkg/api/error.go index 32d22bc4f..ddba237b4 100644 --- a/internal/pkg/api/error.go +++ b/internal/pkg/api/error.go @@ -528,16 +528,16 @@ func ErrorResp(w http.ResponseWriter, r *http.Request, err error) { if resp.StatusCode >= 500 { if trans := apm.TransactionFromContext(r.Context()); trans != nil { - switch typ := err.(type) { - case *es.ErrElastic: + esErr := &es.ErrElastic{} + if errors.As(err, &esErr) { trans.Context.SetLabel("error.type", "ErrElastic") - trans.Context.SetLabel("error.details.status", typ.Status) - trans.Context.SetLabel("error.details.type", typ.Type) - trans.Context.SetLabel("error.details.reason", typ.Reason) - trans.Context.SetLabel("error.details.cause.type", typ.Cause.Type) - trans.Context.SetLabel("error.details.cause.reason", typ.Cause.Reason) - default: - trans.Context.SetLabel("error.type", fmt.Sprintf("%T", typ)) + trans.Context.SetLabel("error.details.status", esErr.Status) + trans.Context.SetLabel("error.details.type", esErr.Type) + trans.Context.SetLabel("error.details.reason", esErr.Reason) + trans.Context.SetLabel("error.details.cause.type", esErr.Cause.Type) + trans.Context.SetLabel("error.details.cause.reason", esErr.Cause.Reason) + } else { + trans.Context.SetLabel("error.type", fmt.Sprintf("%T", err)) } } apm.CaptureError(r.Context(), err).Send() diff --git a/internal/pkg/api/error_test.go b/internal/pkg/api/error_test.go new file mode 100644 index 000000000..3be9feed6 --- /dev/null +++ b/internal/pkg/api/error_test.go @@ -0,0 +1,94 @@ +// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +// or more contributor license agreements. Licensed under the Elastic License; +// you may not use this file except in compliance with the Elastic License. + +package api + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "github.com/elastic/fleet-server/v7/internal/pkg/es" + testlog "github.com/elastic/fleet-server/v7/internal/pkg/testing/log" + "github.com/stretchr/testify/require" + "go.elastic.co/apm/v2" + "go.elastic.co/apm/v2/apmtest" +) + +func Test_ErrorResp(t *testing.T) { + tests := []struct { + name string + err error + expectedTags map[string]interface{} + }{{ + name: "generic error", + err: fmt.Errorf("generic error"), + }, { + name: "elastic error", + err: &es.ErrElastic{}, + expectedTags: map[string]interface{}{ + "error_type": "ErrElastic", + }, + }, { + name: "wrapped elastic error", + err: fmt.Errorf("wrapped error: %w", &es.ErrElastic{}), + expectedTags: map[string]interface{}{ + "error_type": "ErrElastic", + }, + }} + + tracer := apmtest.NewRecordingTracer() + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + logger := testlog.SetLogger(t) + tracer.ResetPayloads() + + tx := tracer.StartTransaction("test", "test") + ctx := apm.ContextWithTransaction(context.Background(), tx) + ctx = logger.WithContext(ctx) + + wr := httptest.NewRecorder() + req, err := http.NewRequestWithContext(ctx, "GET", "http://localhost", nil) + require.NoError(t, err) + + ErrorResp(wr, req, tc.err) + tx.End() + ch := make(chan struct{}, 1) + tracer.Flush(ch) + + payloads := tracer.Payloads() + require.Len(t, payloads.Transactions, 1) + require.Len(t, payloads.Errors, 1) + + tags := make(map[string]interface{}) + for _, tag := range payloads.Transactions[0].Context.Tags { + tags[tag.Key] = tag.Value + } + for k, v := range tc.expectedTags { + require.Contains(t, tags, k, "expected tag is missing") + require.Equal(t, v, tags[k], "expected tag value does not match") + } + }) + } +} + +func Test_ErrorResp_NoTransaction(t *testing.T) { + tracer := apmtest.NewRecordingTracer() + ctx := testlog.SetLogger(t).WithContext(context.Background()) + + wr := httptest.NewRecorder() + req, err := http.NewRequestWithContext(ctx, "GET", "http://localhost", nil) + require.NoError(t, err) + + ErrorResp(wr, req, fmt.Errorf("some error")) + ch := make(chan struct{}, 1) + tracer.Flush(ch) + + payloads := tracer.Payloads() + require.Len(t, payloads.Transactions, 0) + require.Len(t, payloads.Errors, 0) +}