From a43c878978ef6ec598dc693e2a2cb34b93ad4233 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 4 Oct 2024 11:02:28 -0700 Subject: [PATCH] (otelarrowexporter) Adjust configuration based on experimental results (#35478) **Description:** As reported in https://opentelemetry.io/blog/2024/otel-arrow-production/, we have run a number of experiments with the OTel-Arrow components. Our preferred configuration after these experiments is as-seen in this PR. **Link to tracking Issue:** https://github.com/open-telemetry/opentelemetry-collector-contrib/issues/26491 **Testing:** As in https://opentelemetry.io/blog/2024/otel-arrow-production. **Documentation:** Included. --- .chloggen/otelarrow-defaults.yaml | 27 ++++++++++++++ exporter/otelarrowexporter/README.md | 37 ++++++++++++------- exporter/otelarrowexporter/factory.go | 11 ++---- exporter/otelarrowexporter/factory_test.go | 6 +-- .../internal/arrow/exporter.go | 29 +++++++++++++++ 5 files changed, 86 insertions(+), 24 deletions(-) create mode 100644 .chloggen/otelarrow-defaults.yaml diff --git a/.chloggen/otelarrow-defaults.yaml b/.chloggen/otelarrow-defaults.yaml new file mode 100644 index 000000000000..db8d5bf56785 --- /dev/null +++ b/.chloggen/otelarrow-defaults.yaml @@ -0,0 +1,27 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: otelarrowexporter + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Adjust defaults from https://opentelemetry.io/blog/2024/otel-arrow-production/ experiments. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [35477] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/exporter/otelarrowexporter/README.md b/exporter/otelarrowexporter/README.md index 3538bbc4fe73..a6aeaad2fec7 100644 --- a/exporter/otelarrowexporter/README.md +++ b/exporter/otelarrowexporter/README.md @@ -101,17 +101,26 @@ to standard OTLP. - `disabled` (default: false): disables use of Arrow, causing the exporter to use standard OTLP - `disable_downgrade` (default: false): prevents this exporter from using standard OTLP. -The following settings determine the resources that the exporter will use: +The following setting determines how long a stream will stay open. +Stream lifetime is limited to 30 seconds because compression benefit +is limited at that point and shorter streams make load balancing +easier. -- `num_streams` (default: number of CPUs): the number of concurrent Arrow streams -- `max_stream_lifetime` (default: unlimited): duration after which streams are recycled. +- `max_stream_lifetime` (default: 30s): duration after which streams + are recycled. -When `num_streams` is greater than one, a configurable policy -determines how load is assigned across streams. The supported -policies are `leastloaded`, which picks the stream with the smallest -number of outstanding requests, and `leastloadedN` for `N <= -num_streams`, which limits the decision to a random subset of `N` -streams. +The following setting determines memory and CPU resources that the +exporter will use: + +- `num_streams` (default: `max(1, NumCPU()/2)`): the number of concurrent Arrow streams + +The `num_streams` default limits the exporter stream count to half the +number of CPUs or 1, whichever is greater. When `num_streams` is +greater than one, a configurable policy determines how load is +assigned across streams to balance load. The supported policies are +`leastloaded`, which picks the stream with the smallest number of +outstanding requests, and `leastloadedN` for `N <= num_streams`, which +limits the decision to a random subset of `N` streams. - `prioritizer` (default: "leastloaded"): policy for distributing load across multiple streams. @@ -229,12 +238,12 @@ The exporter supports configuring compression at the [Arrow columnar-protocol level](https://arrow.apache.org/docs/format/Columnar.html#format-ipc). -- `payload_compression`: compression applied at the Arrow IPC level, "none" by default, "zstd" supported. +- `payload_compression` (default "zstd"): compression applied at the Arrow IPC level. -Compression settings at the Arrow IPC level cannot be further -configured. We do not recommend configuring both payload and -gRPC-level compression at once, hwoever these settings are -independent. +Compression at the Arrow level is enabled by default because it boosts +compression slightly and helps Arrow payloads meet gRPC maximum +request size limits. Compression settings at the Arrow IPC level +cannot be further configured. For example, two exporters may be configured with multiple zstd configurations, provided they use different levels: diff --git a/exporter/otelarrowexporter/factory.go b/exporter/otelarrowexporter/factory.go index a868fa286d65..98740119145f 100644 --- a/exporter/otelarrowexporter/factory.go +++ b/exporter/otelarrowexporter/factory.go @@ -5,8 +5,6 @@ package otelarrowexporter // import "github.com/open-telemetry/opentelemetry-col import ( "context" - "runtime" - "time" arrowpb "github.com/open-telemetry/otel-arrow/api/experimental/arrow/v1" "go.opentelemetry.io/collector/component" @@ -59,15 +57,14 @@ func createDefaultConfig() component.Config { BalancerName: "round_robin", }, Arrow: ArrowConfig{ - NumStreams: runtime.NumCPU(), - MaxStreamLifetime: time.Hour, + NumStreams: arrow.DefaultNumStreams, + MaxStreamLifetime: arrow.DefaultMaxStreamLifetime, Zstd: zstd.DefaultEncoderConfig(), Prioritizer: arrow.DefaultPrioritizer, - // PayloadCompression is off by default because gRPC - // compression is on by default, above. - PayloadCompression: "", + // Note the default payload compression is + PayloadCompression: arrow.DefaultPayloadCompression, }, } } diff --git a/exporter/otelarrowexporter/factory_test.go b/exporter/otelarrowexporter/factory_test.go index 7498a0403dd8..917aab09b64d 100644 --- a/exporter/otelarrowexporter/factory_test.go +++ b/exporter/otelarrowexporter/factory_test.go @@ -38,9 +38,9 @@ func TestCreateDefaultConfig(t *testing.T) { assert.Equal(t, configcompression.TypeZstd, ocfg.Compression) assert.Equal(t, ArrowConfig{ Disabled: false, - NumStreams: runtime.NumCPU(), - MaxStreamLifetime: time.Hour, - PayloadCompression: "", + NumStreams: max(1, runtime.NumCPU()/2), + MaxStreamLifetime: 30 * time.Second, + PayloadCompression: "zstd", Zstd: zstd.DefaultEncoderConfig(), Prioritizer: arrow.DefaultPrioritizer, }, ocfg.Arrow) diff --git a/exporter/otelarrowexporter/internal/arrow/exporter.go b/exporter/otelarrowexporter/internal/arrow/exporter.go index e42205af197a..f8b858167f03 100644 --- a/exporter/otelarrowexporter/internal/arrow/exporter.go +++ b/exporter/otelarrowexporter/internal/arrow/exporter.go @@ -7,6 +7,7 @@ import ( "context" "errors" "math/rand" + "runtime" "strconv" "sync" "time" @@ -14,6 +15,7 @@ import ( arrowpb "github.com/open-telemetry/otel-arrow/api/experimental/arrow/v1" arrowRecord "github.com/open-telemetry/otel-arrow/pkg/otel/arrow_record" "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/config/configcompression" "go.opentelemetry.io/collector/pdata/plog" "go.opentelemetry.io/collector/pdata/pmetric" "go.opentelemetry.io/collector/pdata/ptrace" @@ -27,6 +29,33 @@ import ( "github.com/open-telemetry/opentelemetry-collector-contrib/internal/otelarrow/netstats" ) +// Defaults settings should use relatively few resources, so that +// users are required to explicitly configure large instances. +var ( + // DefaultNumStreams is half the number of CPUs. This is + // selected as an estimate of relatively how much work is + // being performed by the exporter compared with other + // components in the system. + DefaultNumStreams = max(1, runtime.NumCPU()/2) +) + +const ( + // DefaultMaxStreamLifetime is 30 seconds, because the + // marginal compression benefit of a longer OTel-Arrow stream + // is limited after 100s of batches. + DefaultMaxStreamLifetime = 30 * time.Second + + // DefaultPayloadCompression is "zstd" so that Arrow IPC + // payloads use Arrow-configured Zstd over the payload + // independently of whatever compression gRPC may have + // configured. This is on by default, achieving "double + // compression" because: + // (a) relatively cheap in CPU terms + // (b) minor compression benefit + // (c) helps stay under gRPC request size limits + DefaultPayloadCompression configcompression.Type = "zstd" +) + // Exporter is 1:1 with exporter, isolates arrow-specific // functionality. type Exporter struct {