From 1ac2243081418bb88ed5c6c859e67282a78dc6df Mon Sep 17 00:00:00 2001 From: Dave Henderson Date: Sat, 26 Oct 2024 10:17:37 -0400 Subject: [PATCH] fix(otel): Don't require entrypoint config to initialize OTel Signed-off-by: Dave Henderson --- cmd/relayproxy/api/opentelemetry/otel.go | 21 +++++- cmd/relayproxy/api/opentelemetry/otel_test.go | 72 +++++++++++++++---- cmd/relayproxy/api/server.go | 12 ++-- cmd/relayproxy/config/config.go | 3 + .../docs/relay_proxy/monitor_relay_proxy.md | 3 +- 5 files changed, 88 insertions(+), 23 deletions(-) diff --git a/cmd/relayproxy/api/opentelemetry/otel.go b/cmd/relayproxy/api/opentelemetry/otel.go index 4afd25f69f6..4ed3671abbb 100644 --- a/cmd/relayproxy/api/opentelemetry/otel.go +++ b/cmd/relayproxy/api/opentelemetry/otel.go @@ -15,6 +15,7 @@ import ( semconv "go.opentelemetry.io/otel/semconv/v1.26.0" "go.opentelemetry.io/otel/trace/noop" "go.uber.org/zap" + "go.uber.org/zap/zapcore" ) type OtelService struct { @@ -71,9 +72,25 @@ func (s *OtelService) Init(ctx context.Context, zapLog *zap.Logger, config confi otel.SetTracerProvider(s.otelTraceProvider) - // log OTel errors to zap rather than the default log package + // log OTel errors to zap rather than the default log package, respecting + // the OTEL_LOG_LEVEL variable otel.SetErrorHandler(otelErrHandler(func(err error) { - zapLog.Error("OTel error", zap.Error(err)) + var log func(msg string, fields ...zapcore.Field) + switch config.OtelConfig.Log.Level { + case "info": + log = zapLog.Info + case "warn": + log = zapLog.Warn + case "error": + log = zapLog.Error + default: + // default for OTEL_LOG_LEVEL is "info" according to spec, but + // we'll use debug level to avoid spamming logs when the user hasn't + // configured tracing explicitly + log = zapLog.Debug + } + + log("OTel error", zap.Error(err)) })) return nil diff --git a/cmd/relayproxy/api/opentelemetry/otel_test.go b/cmd/relayproxy/api/opentelemetry/otel_test.go index da1b99bfcfe..3aeb4fcdf25 100644 --- a/cmd/relayproxy/api/opentelemetry/otel_test.go +++ b/cmd/relayproxy/api/opentelemetry/otel_test.go @@ -208,23 +208,69 @@ func TestInit(t *testing.T) { }) t.Run("error handler logs to zap", func(t *testing.T) { - obs, logs := observer.New(zap.InfoLevel) - testLogger := zap.New(obs) + t.Run("doesn't log by default", func(t *testing.T) { + // logs shouldn't be v + obs, logs := observer.New(zap.InfoLevel) + testLogger := zap.New(obs) - expectedErr := errors.New("test error") + expectedErr := errors.New("test error") - err := svc.Init(context.Background(), testLogger, config.Config{}) - require.NoError(t, err) - defer func() { _ = svc.Stop(context.Background()) }() - otel.GetErrorHandler().Handle(expectedErr) + err := svc.Init(context.Background(), testLogger, config.Config{}) + require.NoError(t, err) + defer func() { _ = svc.Stop(context.Background()) }() + otel.GetErrorHandler().Handle(expectedErr) + + require.Len(t, logs.All(), 0) + }) + + t.Run("visible at debug level", func(t *testing.T) { + obs, logs := observer.New(zap.DebugLevel) + testLogger := zap.New(obs) + + expectedErr := errors.New("test error") + + err := svc.Init(context.Background(), testLogger, config.Config{}) + require.NoError(t, err) + defer func() { _ = svc.Stop(context.Background()) }() + otel.GetErrorHandler().Handle(expectedErr) + + require.Len(t, logs.All(), 1) - require.Len(t, logs.All(), 1) + want := []observer.LoggedEntry{{ + Entry: zapcore.Entry{Level: zap.DebugLevel, Message: "OTel error"}, + Context: []zapcore.Field{zap.Error(expectedErr)}, + }} - want := []observer.LoggedEntry{{ - Entry: zapcore.Entry{Level: zap.ErrorLevel, Message: "OTel error"}, - Context: []zapcore.Field{zap.Error(expectedErr)}, - }} + assert.Equal(t, want, logs.AllUntimed()) + }) + + t.Run("overridden at configured level", func(t *testing.T) { + expectedErr := errors.New("test error") + + levels := []zapcore.Level{zapcore.InfoLevel, zapcore.WarnLevel, zapcore.ErrorLevel} + for _, l := range levels { + t.Run(l.String(), func(t *testing.T) { + obs, logs := observer.New(zap.InfoLevel) + testLogger := zap.New(obs) + + cfg := config.Config{} + cfg.OtelConfig.Log.Level = l.String() - assert.Equal(t, want, logs.AllUntimed()) + err := svc.Init(context.Background(), testLogger, cfg) + require.NoError(t, err) + defer func() { _ = svc.Stop(context.Background()) }() + otel.GetErrorHandler().Handle(expectedErr) + + require.Len(t, logs.All(), 1) + + want := []observer.LoggedEntry{{ + Entry: zapcore.Entry{Level: l, Message: "OTel error"}, + Context: []zapcore.Field{zap.Error(expectedErr)}, + }} + + assert.Equal(t, want, logs.AllUntimed()) + }) + } + }) }) } diff --git a/cmd/relayproxy/api/server.go b/cmd/relayproxy/api/server.go index 8054561cdd8..65185470492 100644 --- a/cmd/relayproxy/api/server.go +++ b/cmd/relayproxy/api/server.go @@ -120,12 +120,10 @@ func (s *Server) Start() { } // start the OpenTelemetry tracing service - if s.config.OpenTelemetryOtlpEndpoint != "" { - err := s.otelService.Init(context.Background(), s.zapLog, *s.config) - if err != nil { - s.zapLog.Error("error while initializing Otel", zap.Error(err)) - // we can continue because otel is not mandatory to start the server - } + err := s.otelService.Init(context.Background(), s.zapLog, *s.config) + if err != nil { + s.zapLog.Error("error while initializing OTel, continuing without tracing enabled", zap.Error(err)) + // we can continue because otel is not mandatory to start the server } // starting the main application @@ -138,7 +136,7 @@ func (s *Server) Start() { zap.String("address", address), zap.String("version", s.config.Version)) - err := s.apiEcho.Start(address) + err = s.apiEcho.Start(address) if err != nil && !errors.Is(err, http.ErrServerClosed) { s.zapLog.Fatal("Error starting relay proxy", zap.Error(err)) } diff --git a/cmd/relayproxy/config/config.go b/cmd/relayproxy/config/config.go index 404e77416fe..d61a29f677d 100644 --- a/cmd/relayproxy/config/config.go +++ b/cmd/relayproxy/config/config.go @@ -285,6 +285,9 @@ type OpenTelemetryConfiguration struct { Protocol string `mapstructure:"protocol" koanf:"protocol"` } `mapstructure:"otlp" koanf:"otlp"` } `mapstructure:"exporter" koanf:"exporter"` + Log struct { + Level string `mapstructure:"level" koanf:"level"` + } `mapstructure:"log" koanf:"log"` Service struct { Name string `mapstructure:"name" koanf:"name"` } `mapstructure:"service" koanf:"service"` diff --git a/website/docs/relay_proxy/monitor_relay_proxy.md b/website/docs/relay_proxy/monitor_relay_proxy.md index 740c3e1df0f..cc695b848cc 100644 --- a/website/docs/relay_proxy/monitor_relay_proxy.md +++ b/website/docs/relay_proxy/monitor_relay_proxy.md @@ -11,8 +11,9 @@ The **relay proxy** is able to trace the requests it is handling. This is done b ### Configuration By default, the relay proxy will attempt to send traces to an OpenTelemetry -collector or compatible agent running at `http://localhost:4318` using the +collector or compatible agent running at `https://localhost:4318` using the `http/protobuf` protocol. +To see logs (including error messages) from the OTel subsystem, enable debug logging or set the `OTEL_LOG_LEVEL` variable (defaults to `debug`). To override the endpoint, set the `OTEL_EXPORTER_OTLP_ENDPOINT` environment variable. To override the protocol, set the `OTEL_EXPORTER_OTLP_PROTOCOL` environment variable. See [the OpenTelemetry documentation](https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/) for more information.