Skip to content

Commit

Permalink
feat(tracing): Misc. OTel tracing improvements (#2485)
Browse files Browse the repository at this point in the history
* feat(tracing): Misc. OTel tracing improvements

Signed-off-by: Dave Henderson <[email protected]>

* chore: Use koanf instead of the direct usage of env variables

---------

Signed-off-by: Dave Henderson <[email protected]>
Co-authored-by: Thomas Poignant <[email protected]>
  • Loading branch information
hairyhenderson and thomaspoignant authored Oct 23, 2024
1 parent ad84c27 commit f085397
Show file tree
Hide file tree
Showing 10 changed files with 494 additions and 49 deletions.
164 changes: 139 additions & 25 deletions cmd/relayproxy/api/opentelemetry/otel.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,68 +2,182 @@ package opentelemetry

import (
"context"
"net/url"
"fmt"
"os"
"time"

"github.com/thomaspoignant/go-feature-flag/cmd/relayproxy/config"
"go.opentelemetry.io/contrib/exporters/autoexport"
"go.opentelemetry.io/contrib/samplers/jaegerremote"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"go.opentelemetry.io/otel/trace/noop"
"go.uber.org/zap"
)

type OtelService struct {
otelTraceProvider *sdktrace.TracerProvider
otelExporter *otlptrace.Exporter
otelExporter sdktrace.SpanExporter
}

func NewOtelService() OtelService {
return OtelService{}
}

// Init the OpenTelemetry service
func (s *OtelService) Init(ctx context.Context, config config.Config) error {
// parsing the OpenTelemetry endpoint
u, err := url.Parse(config.OpenTelemetryOtlpEndpoint)
func (s *OtelService) Init(ctx context.Context, zapLog *zap.Logger, config config.Config) error {
// OTEL_SDK_DISABLED is not supported by the Go SDK, but is a standard env
// var defined by the OTel spec. We'll use it to disable the trace provider.
if config.OtelConfig.SDK.Disabled {
otel.SetTracerProvider(noop.NewTracerProvider())
return nil
}

// support the openTelemetryOtlpEndpoint config element
if config.OpenTelemetryOtlpEndpoint != "" &&
config.OtelConfig.Exporter.Otlp.Endpoint == "" {
config.OtelConfig.Exporter.Otlp.Endpoint = config.OpenTelemetryOtlpEndpoint
_ = os.Setenv("OTEL_EXPORTER_OTLP_ENDPOINT", config.OpenTelemetryOtlpEndpoint)
}

exporter, err := autoexport.NewSpanExporter(ctx)
if err != nil {
return err
return fmt.Errorf("initializing OTel exporter: %w", err)
}

serviceName := "go-feature-flag"
if v := config.OtelConfig.Service.Name; v != "" {
serviceName = v
}

var opts []otlptracehttp.Option
if u.Scheme == "http" {
opts = append(opts, otlptracehttp.WithInsecure())
sampler, err := initSampler(serviceName, config)
if err != nil {
return fmt.Errorf("initializing OTel sampler: %w", err)
}
opts = append(opts, otlptracehttp.WithEndpoint(u.Host))
client := otlptracehttp.NewClient(opts...)

s.otelExporter, err = otlptrace.New(ctx, client)
resource, err := initResource(ctx, serviceName, config.Version)
if err != nil {
return err
return fmt.Errorf("initializing OTel resources: %w", err)
}

s.otelExporter = exporter
s.otelTraceProvider = sdktrace.NewTracerProvider(
sdktrace.WithSampler(sdktrace.AlwaysSample()),
sdktrace.WithBatcher(s.otelExporter),
sdktrace.WithResource(resource.NewSchemaless(
attribute.String("service.name", "go-feature-flag"),
attribute.String("service.version", config.Version),
)),
sdktrace.WithBatcher(exporter),
sdktrace.WithSampler(sampler),
sdktrace.WithResource(resource),
)

otel.SetTracerProvider(s.otelTraceProvider)

// log OTel errors to zap rather than the default log package
otel.SetErrorHandler(otelErrHandler(func(err error) {
zapLog.Error("OTel error", zap.Error(err))
}))

return nil
}

type otelErrHandler func(err error)

func (o otelErrHandler) Handle(err error) {
o(err)
}

var _ otel.ErrorHandler = otelErrHandler(nil)

func initResource(ctx context.Context, serviceName string, version string) (*resource.Resource, error) {
return resource.New(ctx,
resource.WithFromEnv(),
resource.WithProcessPID(),
resource.WithProcessExecutableName(),
resource.WithProcessExecutablePath(),
resource.WithProcessOwner(),
resource.WithProcessRuntimeName(),
resource.WithProcessRuntimeVersion(),
resource.WithProcessRuntimeDescription(),
resource.WithHost(),
resource.WithTelemetrySDK(),
resource.WithOS(),
resource.WithContainer(),
resource.WithAttributes(
semconv.ServiceNameKey.String(serviceName),
semconv.ServiceVersionKey.String(version),
),
)
}

// initSampler determines which sampling strategy to use. If OTEL_TRACES_SAMPLER
// is unset, we'll always sample.
// If it's set to jaeger_remote, we'll use the Jaeger sampling server (supports
// JAEGER_SAMPLER_MANAGER_HOST_PORT, JAEGER_SAMPLER_REFRESH_INTERVAL, and
// JAEGER_SAMPLER_MAX_OPERATIONS).
// If it's set to any other value, we return nil and sdktrace.NewTracerProvider
// will set up the initSampler from the environment.
func initSampler(serviceName string, conf config.Config) (sdktrace.Sampler, error) {
sampler := conf.OtelConfig.Traces.Sampler
if sampler == "" {
return sdktrace.AlwaysSample(), nil
}

if sampler != "jaeger_remote" {
return nil, nil
}

samplerURL, samplerRefreshInterval, maxOperations, err := jaegerRemoteSamplerOpts(conf)
if err != nil {
return nil, err
}

return jaegerremote.New(
serviceName,
jaegerremote.WithSamplingServerURL(samplerURL),
jaegerremote.WithSamplingRefreshInterval(samplerRefreshInterval),
jaegerremote.WithMaxOperations(maxOperations),
jaegerremote.WithInitialSampler(sdktrace.AlwaysSample()),
), nil
}

const (
defaultSamplerURL = "http://localhost:5778/sampling"
defaultSamplingRefreshInterval = 1 * time.Minute
defaultSamplingMaxOperations = 256
)

func jaegerRemoteSamplerOpts(conf config.Config) (string, time.Duration, int, error) {
samplerURL := defaultSamplerURL
if host := conf.JaegerConfig.Sampler.Manager.Host.Port; host != "" {
samplerURL = host
}

samplerRefreshInterval := defaultSamplingRefreshInterval
if v := conf.JaegerConfig.Sampler.Refresh.Interval; v != "" {
d, err := time.ParseDuration(v)
if err != nil {
return "", 0, 0, fmt.Errorf("parsing JAEGER_SAMPLER_REFRESH_INTERVAL: %w", err)
}
samplerRefreshInterval = d
}

maxOperations := defaultSamplingMaxOperations
if v := conf.JaegerConfig.Sampler.Max.Operations; v != 0 {
maxOperations = v
}
return samplerURL, samplerRefreshInterval, maxOperations, nil
}

// Stop the OpenTelemetry service
func (s *OtelService) Stop() error {
func (s *OtelService) Stop(ctx context.Context) error {
if s.otelExporter != nil {
err := s.otelExporter.Shutdown(context.Background())
err := s.otelExporter.Shutdown(ctx)
if err != nil {
return err
}
}

if s.otelTraceProvider != nil {
err := s.otelTraceProvider.Shutdown(context.Background())
err := s.otelTraceProvider.Shutdown(ctx)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit f085397

Please sign in to comment.