Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merge span normalizer into grouper #384

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
13 changes: 0 additions & 13 deletions hypertrace-ingester/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -37,7 +37,6 @@ dependencies {
implementation(project(":span-normalizer:span-normalizer-constants"))
implementation(project(":span-normalizer:raw-span-constants"))
implementation(project(":semantic-convention-utils"))
implementation(project(":span-normalizer:span-normalizer"))
implementation(project(":raw-spans-grouper:raw-spans-grouper"))
implementation(project(":hypertrace-trace-enricher:hypertrace-trace-enricher"))
implementation(project(":hypertrace-view-generator:hypertrace-view-generator"))
@@ -65,12 +64,6 @@ tasks.processResources {

tasks.register<Copy>("copyServiceConfigs") {
with(
createCopySpec(
"span-normalizer",
"span-normalizer",
"main",
"common"
),
createCopySpec(
"raw-spans-grouper",
"raw-spans-grouper",
@@ -143,12 +136,6 @@ tasks.test {

tasks.register<Copy>("copyServiceConfigsTest") {
with(
createCopySpec(
"span-normalizer",
"span-normalizer",
"test",
"span-normalizer"
),
createCopySpec(
"raw-spans-grouper",
"raw-spans-grouper",
Original file line number Diff line number Diff line change
@@ -15,7 +15,6 @@
import org.hypertrace.core.serviceframework.config.ConfigClient;
import org.hypertrace.core.serviceframework.config.ConfigClientFactory;
import org.hypertrace.core.serviceframework.config.ConfigUtils;
import org.hypertrace.core.spannormalizer.SpanNormalizer;
import org.hypertrace.core.viewgenerator.service.MultiViewGeneratorLauncher;
import org.hypertrace.metrics.exporter.MetricsExporterService;
import org.hypertrace.metrics.generator.MetricsGenerator;
@@ -52,9 +51,6 @@ public HypertraceIngester(ConfigClient configClient) {
private KafkaStreamsApp getSubTopologyInstance(String name) {
KafkaStreamsApp kafkaStreamsApp;
switch (name) {
case "span-normalizer":
kafkaStreamsApp = new SpanNormalizer(ConfigClientFactory.getClient());
break;
case "raw-spans-grouper":
kafkaStreamsApp = new RawSpansGrouper(ConfigClientFactory.getClient());
break;
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@ service.name = hypertrace-ingester
service.admin.port = 8099

sub.topology.names = [
"span-normalizer",
"raw-spans-grouper",
"hypertrace-trace-enricher",
"all-views"
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@
import com.google.protobuf.ByteString;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import io.jaegertracing.api_v2.JaegerSpanInternalModel.Span;
import io.jaegertracing.api_v2.JaegerSpanInternalModel;
import java.io.File;
import java.nio.file.Path;
import java.time.Duration;
@@ -36,7 +36,6 @@ public class HypertraceIngesterTest {
private static final String CONFIG_PATH = "configs/%s/application.conf";
private HypertraceIngester underTest;
private Config underTestConfig;
private Config spanNormalizerConfig;
private Config rawSpansGrouperConfig;
private Config traceEnricherConfig;
private Config spanEventViewGeneratorConfig;
@@ -45,7 +44,6 @@ public class HypertraceIngesterTest {
public void setUp() {
underTest = new HypertraceIngester(ConfigClientFactory.getClient());
underTestConfig = getConfig("hypertrace-ingester");
spanNormalizerConfig = getConfig("span-normalizer");
rawSpansGrouperConfig = getConfig("raw-spans-grouper");
traceEnricherConfig = getConfig("hypertrace-trace-enricher");
spanEventViewGeneratorConfig = getConfig("view-gen-span-event");
@@ -71,30 +69,26 @@ public void testIngestionPacketFlow(@TempDir Path tempDir) {
// create topology test driver for ingester
TopologyTestDriver topologyTestDriver = new TopologyTestDriver(streamsBuilder.build(), props);

Span span =
Span.newBuilder()
JaegerSpanInternalModel.Span span =
JaegerSpanInternalModel.Span.newBuilder()
.setSpanId(ByteString.copyFrom("1".getBytes()))
.setTraceId(ByteString.copyFrom("trace-1".getBytes()))
.addTags(
JaegerSpanInternalModel.KeyValue.newBuilder()
.setKey("tenant-id")
.setVStr("tenant-1")
.build())
.build();

TestInputTopic<byte[], Span> spanNormalizerInputTopic =
TestInputTopic<byte[], JaegerSpanInternalModel.Span> spanGrouperInputTopic =
topologyTestDriver.createInputTopic(
spanNormalizerConfig.getString(
rawSpansGrouperConfig.getString(
org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants
.INPUT_TOPIC_CONFIG_KEY),
Serdes.ByteArray().serializer(),
new JaegerSpanSerde().serializer());

spanNormalizerInputTopic.pipeInput(span);

// create output topic for span-normalizer topology
TestOutputTopic spanNormalizerOutputTopic =
topologyTestDriver.createOutputTopic(
spanNormalizerConfig.getString(
StructuredTraceEnricherConstants.OUTPUT_TOPIC_CONFIG_KEY),
Serdes.String().deserializer(),
new AvroSerde<>().deserializer());
assertNotNull(spanNormalizerOutputTopic.readKeyValue());
spanGrouperInputTopic.pipeInput(span);

topologyTestDriver.advanceWallClockTime(Duration.ofSeconds(32));

@@ -107,14 +101,6 @@ public void testIngestionPacketFlow(@TempDir Path tempDir) {
new AvroSerde<>().deserializer());
assertNotNull(spanGrouperOutputTopic.readKeyValue());

// create output topic for trace-enricher topology
TestOutputTopic traceEnricherOutputTopic =
topologyTestDriver.createOutputTopic(
traceEnricherConfig.getString(StructuredTraceEnricherConstants.OUTPUT_TOPIC_CONFIG_KEY),
Serdes.String().deserializer(),
new AvroSerde<>().deserializer());
assertNotNull(traceEnricherOutputTopic.readKeyValue());

// create output topic for topology
TestOutputTopic spanEventViewOutputTopic =
topologyTestDriver.createOutputTopic(
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ main.class = org.hypertrace.ingester.HypertraceIngester
service.name = hypertrace-ingester
service.admin.port = 8099

sub.topology.names = ["span-normalizer", "raw-spans-grouper", "hypertrace-trace-enricher", "all-views"]
sub.topology.names = ["raw-spans-grouper", "hypertrace-trace-enricher", "all-views"]

precreate.topics = false

80 changes: 80 additions & 0 deletions raw-spans-grouper/helm/templates/raw-spans-grouper-config.yaml
Original file line number Diff line number Diff line change
@@ -81,3 +81,83 @@ data:
}
}
{{- end }}

# span normalizer config
{{- if hasKey .Values.spanNormalizerConfig "processor" }}
processor {
{{- if hasKey .Values.spanNormalizerConfig.processor "tenantIdTagKey" }}
tenantIdTagKey = "{{ .Values.spanNormalizerConfig.processor.tenantIdTagKey }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "excludeTenantIds" }}
excludeTenantIds = {{ .Values.spanNormalizerConfig.processor.excludeTenantIds | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "defaultTenantId" }}
defaultTenantId = "{{ .Values.spanNormalizerConfig.processor.defaultTenantId }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "spanDropCriterion" }}
spanDropCriterion = {{ .Values.spanNormalizerConfig.processor.spanDropCriterion | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "spanDropFilters" }}
spanDropFilters = {{ .Values.spanNormalizerConfig.processor.spanDropFilters | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "bypassKey" }}
bypass.key = "{{ .Values.spanNormalizerConfig.processor.bypassKey }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "lateArrivalThresholdDuration" }}
late.arrival.threshold.duration = "{{ .Values.spanNormalizerConfig.processor.lateArrivalThresholdDuration }}"
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "allowedAttributesPrefixes" }}
allowed.attributes.prefixes = {{ .Values.spanNormalizerConfig.processor.allowedAttributesPrefixes | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "prefixedMatchedAllowedAttributes" }}
prefixed.matched.allowed.attributes = {{ .Values.spanNormalizerConfig.processor.prefixedMatchedAllowedAttributes | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "rootExitSpanDropCriterion" }}
rootExitSpanDropCriterion = {{ .Values.spanNormalizerConfig.processor.rootExitSpanDropCriterion | toJson }}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig.processor "excludeLogsTenantIds" }}
excludeLogsTenantIds = {{ .Values.spanNormalizerConfig.processor.excludeLogsTenantIds | toJson }}
{{- end }}
}
{{- end }}

{{- if hasKey .Values.spanNormalizerConfig "metrics" }}
metrics {
reporter {
names = {{- toJson .Values.spanNormalizerConfig.metrics.reporter.names | trim | nindent 12 }}
}
}
{{- end }}
clients {
config.service.config = {
host = {{ .Values.spanNormalizerConfig.configServiceHost }}
port = {{ .Values.spanNormalizerConfig.configServicePort }}
}
}
span.rules.exclude {
cache = {
refreshAfterWriteDuration = {{ .Values.spanNormalizerConfig.excludeSpanRulesConfig.cache.refreshAfterWriteDuration }}
expireAfterWriteDuration = {{ .Values.spanNormalizerConfig.excludeSpanRulesConfig.cache.expireAfterWriteDuration }}
}
}
{{- if hasKey .Values.spanNormalizerConfig "rateLimitConfig" }}
rate.limit.config = [
{{- range $k,$v := $.Values.spanNormalizerConfig.rateLimitConfig }}
{
tenantId = {{ $v.tenantId }}
groupingKey = {{ $v.groupingKey }}
maxSpansPerMinute = {{ $v.maxSpansPerMinute }}
},
{{- end }}
]
{{- end }}
16 changes: 16 additions & 0 deletions raw-spans-grouper/helm/values.yaml
Original file line number Diff line number Diff line change
@@ -134,6 +134,15 @@ rawSpansGrouperConfig:
groupby:
internal: 30

spanNormalizerConfig:
name: span-normalizer-config
excludeSpanRulesConfig:
cache:
refreshAfterWriteDuration: 3m
expireAfterWriteDuration: 5m
rateLimitConfig: []


logConfig:
name: raw-spans-grouper-log-config
monitorInterval: 30
@@ -166,6 +175,13 @@ kafka-topic-creator:
partitions: 8
configs:
cleanup.policy: "[compact, delete]"
raw-logs:
replicationFactor: 3
partitions: 8
configs:
retention.bytes: 8589934592 # default = -1
retention.ms: 86400000 # default = 604800000 (7 days)
max.message.bytes: 1048588 # default = 1048588

zookeeper:
address: zookeeper:2181
15 changes: 15 additions & 0 deletions raw-spans-grouper/raw-spans-grouper/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -33,6 +33,9 @@ dependencies {
because("https://snyk.io/vuln/SNYK-JAVA-ORGGLASSFISHJERSEYCORE-1255637")
}
implementation(project(":span-normalizer:span-normalizer-api"))
implementation(project(":span-normalizer:raw-span-constants"))
implementation(project(":span-normalizer:span-normalizer-constants"))
implementation(project(":semantic-convention-utils"))
implementation("org.hypertrace.core.datamodel:data-model:0.1.27")
implementation("org.hypertrace.core.serviceframework:platform-service-framework:0.1.49")
implementation("org.hypertrace.core.serviceframework:platform-metrics:0.1.49")
@@ -41,6 +44,18 @@ dependencies {
implementation("org.hypertrace.core.kafkastreams.framework:weighted-group-partitioner:0.2.6")
implementation("de.javakaffee:kryo-serializers:0.45")
implementation("com.google.guava:guava:31.1-jre")
implementation("org.apache.commons:commons-lang3:3.12.0")

// Required for the GRPC clients.
runtimeOnly("io.grpc:grpc-netty:1.50.0")
annotationProcessor("org.projectlombok:lombok:1.18.18")
compileOnly("org.projectlombok:lombok:1.18.18")

implementation("org.hypertrace.config.service:span-processing-config-service-api:0.1.47")
implementation("org.hypertrace.config.service:span-processing-utils:0.1.47")

implementation("org.hypertrace.core.grpcutils:grpc-client-utils:0.11.2")
implementation("org.hypertrace.core.grpcutils:grpc-context-utils:0.11.2")

// Logging
implementation("org.slf4j:slf4j-api:1.7.30")
Original file line number Diff line number Diff line change
@@ -6,13 +6,18 @@
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.RAW_SPANS_GROUPER_JOB_CONFIG;
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.SPAN_STATE_STORE_NAME;
import static org.hypertrace.core.rawspansgrouper.RawSpanGrouperConstants.TRACE_STATE_STORE;
import static org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants.BYPASS_OUTPUT_TOPIC_CONFIG_KEY;
import static org.hypertrace.core.spannormalizer.constants.SpanNormalizerConstants.OUTPUT_TOPIC_RAW_LOGS_CONFIG_KEY;

import com.typesafe.config.Config;
import io.jaegertracing.api_v2.JaegerSpanInternalModel;
import java.util.List;
import java.util.Map;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.Consumed;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.Named;
import org.apache.kafka.streams.kstream.Produced;
@@ -29,6 +34,13 @@
import org.hypertrace.core.spannormalizer.SpanIdentity;
import org.hypertrace.core.spannormalizer.TraceIdentity;
import org.hypertrace.core.spannormalizer.TraceState;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanPreProcessor;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanSerde;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanToAvroRawSpanTransformer;
import org.hypertrace.core.spannormalizer.jaeger.JaegerSpanToLogRecordsTransformer;
import org.hypertrace.core.spannormalizer.jaeger.PreProcessedSpan;
import org.hypertrace.core.spannormalizer.rawspan.ByPassPredicate;
import org.hypertrace.core.spannormalizer.rawspan.RawSpanToStructuredTraceTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@@ -44,17 +56,19 @@ public StreamsBuilder buildTopology(
Map<String, Object> properties,
StreamsBuilder streamsBuilder,
Map<String, KStream<?, ?>> inputStreams) {

Config jobConfig = getJobConfig(properties);
String inputTopic = jobConfig.getString(INPUT_TOPIC_CONFIG_KEY);
String outputTopic = jobConfig.getString(OUTPUT_TOPIC_CONFIG_KEY);
String bypassOutputTopic = jobConfig.getString(BYPASS_OUTPUT_TOPIC_CONFIG_KEY);
String outputTopicRawLogs = jobConfig.getString(OUTPUT_TOPIC_RAW_LOGS_CONFIG_KEY);

KStream<TraceIdentity, RawSpan> inputStream =
(KStream<TraceIdentity, RawSpan>) inputStreams.get(inputTopic);
KStream<byte[], JaegerSpanInternalModel.Span> inputStream =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Needs to be an incremental change. We need to read from both jaeger-spans and raw-spans-from-jaeger-spans topics in this PR.

Only in follow up PR some time down the line, we can just read from jaeger-spans

(KStream<byte[], JaegerSpanInternalModel.Span>) inputStreams.get(inputTopic);
if (inputStream == null) {
inputStream =
streamsBuilder
// read the input topic
.stream(inputTopic);
streamsBuilder.stream(
inputTopic, Consumed.with(Serdes.ByteArray(), new JaegerSpanSerde()));
inputStreams.put(inputTopic, inputStream);
}

@@ -75,7 +89,34 @@ public StreamsBuilder buildTopology(
streamsBuilder.addStateStore(spanStoreBuilder);
streamsBuilder.addStateStore(traceStateStoreBuilder);

StreamPartitioner<TraceIdentity, StructuredTrace> groupPartitioner =
KStream<byte[], PreProcessedSpan> preProcessedStream =
inputStream.transform(() -> new JaegerSpanPreProcessor(getGrpcChannelRegistry()));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How are we ensuring that this jaeger spans topic is consumed exactly from the offset where the span-normalizer got decommissioned?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point. Being different applications, it will be difficult to get them in sync.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My suggestion would be the following:
We shouldn't even be reading from the existing jaeger-spans topic. This topology should read from a different topic(say jaeger-spans-v2). On the producer side(i.e. hypertrace-collector), we should have a way to control whether to produce to the existing jaeger-spans topic or to the new jaegar-spans-v2. Once we make the flip to the v2 topic we can decommission span-normalizer.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably need to keep both applications for now. And may need to move raw-span-grouper logic to span-normalizer instead of other way around.


// logs output
preProcessedStream.transform(JaegerSpanToLogRecordsTransformer::new).to(outputTopicRawLogs);

KStream<TraceIdentity, RawSpan>[] branches =
preProcessedStream
.transform(JaegerSpanToAvroRawSpanTransformer::new)
.branch(new ByPassPredicate(jobConfig), (key, value) -> true);

KStream<TraceIdentity, RawSpan> bypassTopicBranch = branches[0];
KStream<TraceIdentity, RawSpan> outputTopicBranch = branches[1];

StreamPartitioner<String, StructuredTrace> tenantIsolationPartitionerForBypassTopic =
new GroupPartitionerBuilder<String, StructuredTrace>()
.buildPartitioner(
"spans",
jobConfig,
(traceid, span) -> traceid,
new KeyHashPartitioner<>(),
getGrpcChannelRegistry());

bypassTopicBranch
.transform(RawSpanToStructuredTraceTransformer::new)
.to(bypassOutputTopic, Produced.with(null, null, tenantIsolationPartitionerForBypassTopic));

StreamPartitioner<TraceIdentity, StructuredTrace> tenantIsolationPartitionerForOutputTopic =
new GroupPartitionerBuilder<TraceIdentity, StructuredTrace>()
.buildPartitioner(
"spans",
@@ -85,10 +126,10 @@ public StreamsBuilder buildTopology(
getGrpcChannelRegistry());

Produced<TraceIdentity, StructuredTrace> outputTopicProducer =
Produced.with(null, null, groupPartitioner);
Produced.with(null, null, tenantIsolationPartitionerForOutputTopic);
outputTopicProducer = outputTopicProducer.withName(OUTPUT_TOPIC_PRODUCER);

inputStream
outputTopicBranch
.transform(
RawSpansProcessor::new,
Named.as(RawSpansProcessor.class.getSimpleName()),
Loading