Skip to content

Commit

Permalink
Add alternative cpu profiler for jvms without jfr
Browse files Browse the repository at this point in the history
  • Loading branch information
laurit committed Jun 8, 2023
1 parent a62c21e commit 7f13711
Show file tree
Hide file tree
Showing 15 changed files with 383 additions and 253 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ public class Configuration implements AutoConfigurationCustomizerProvider {
private static final boolean DEFAULT_MEMORY_EVENT_RATE_LIMIT_ENABLED = true;

public static final String CONFIG_KEY_ENABLE_PROFILER = PROFILER_ENABLED_PROPERTY;
public static final String CONFIG_KEY_PROFILER_JFR = "splunk.profiler.jfr";
public static final String CONFIG_KEY_PROFILER_DIRECTORY = "splunk.profiler.directory";
public static final String CONFIG_KEY_RECORDING_DURATION = "splunk.profiler.recording.duration";
public static final String CONFIG_KEY_KEEP_FILES = "splunk.profiler.keep-files";
Expand Down Expand Up @@ -101,6 +102,10 @@ public static String getConfigUrl(ConfigProperties config) {
return config.getString(CONFIG_KEY_INGEST_URL, ingestUrl);
}

public static boolean getProfilerJfrEnabled(ConfigProperties config) {
return config.getBoolean(CONFIG_KEY_PROFILER_JFR, true);
}

public static boolean getTLABEnabled(ConfigProperties config) {
boolean memoryEnabled = config.getBoolean(CONFIG_KEY_MEMORY_ENABLED, DEFAULT_MEMORY_ENABLED);
return config.getBoolean(CONFIG_KEY_TLAB_ENABLED, memoryEnabled);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,12 @@
import com.splunk.opentelemetry.profiler.allocation.exporter.AllocationEventExporter;
import com.splunk.opentelemetry.profiler.allocation.exporter.PprofAllocationEventExporter;
import com.splunk.opentelemetry.profiler.context.SpanContextualizer;
import com.splunk.opentelemetry.profiler.events.EventPeriods;
import com.splunk.opentelemetry.profiler.contextstorage.JavaContextStorage;
import com.splunk.opentelemetry.profiler.exporter.CpuEventExporter;
import com.splunk.opentelemetry.profiler.exporter.PprofCpuEventExporter;
import com.splunk.opentelemetry.profiler.util.HelpfulExecutors;
import io.opentelemetry.api.logs.Logger;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.javaagent.extension.AgentListener;
import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk;
import io.opentelemetry.sdk.autoconfigure.spi.ConfigProperties;
Expand All @@ -45,43 +46,103 @@
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Duration;
import java.time.Instant;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

@AutoService(AgentListener.class)
public class JfrActivator implements AgentListener {

private static final java.util.logging.Logger logger =
java.util.logging.Logger.getLogger(JfrActivator.class.getName());
private final ExecutorService executor = HelpfulExecutors.newSingleThreadExecutor("JFR Profiler");
private final ConfigurationLogger configurationLogger = new ConfigurationLogger();

@Override
public void afterAgent(AutoConfiguredOpenTelemetrySdk autoConfiguredOpenTelemetrySdk) {
ConfigProperties config = autoConfiguredOpenTelemetrySdk.getConfig();
if (notClearForTakeoff(config)) {
if (!config.getBoolean(CONFIG_KEY_ENABLE_PROFILER, false)) {
logger.fine("Profiler is not enabled.");
return;
}
boolean useJfr = Configuration.getProfilerJfrEnabled(config);
if (useJfr && !JFR.instance.isAvailable()) {
logger.fine(
"JDK Flight Recorder (JFR) is not available in this JVM, switching to java profiler.");
if (Configuration.getTLABEnabled(config)) {
logger.warning(
"JDK Flight Recorder (JFR) is not available in this JVM. Memory profiling is disabled.");
}
useJfr = false;
}

configurationLogger.log(config);
logger.info("Profiler is active.");
executor.submit(
logUncaught(
() -> activateJfrAndRunForever(config, autoConfiguredOpenTelemetrySdk.getResource())));
}

private boolean notClearForTakeoff(ConfigProperties config) {
if (!config.getBoolean(CONFIG_KEY_ENABLE_PROFILER, false)) {
logger.fine("Profiler is not enabled.");
return true;
if (useJfr) {
JfrProfiler.run(this, config, autoConfiguredOpenTelemetrySdk.getResource());
} else {
JavaProfiler.run(this, config, autoConfiguredOpenTelemetrySdk.getResource());
}
if (!JFR.instance.isAvailable()) {
logger.warning(
"JDK Flight Recorder (JFR) is not available in this JVM. Profiling is disabled.");
return true;
}

private static class JfrProfiler {
private static final ExecutorService executor =
HelpfulExecutors.newSingleThreadExecutor("JFR Profiler");

static void run(JfrActivator activator, ConfigProperties config, Resource resource) {
executor.submit(logUncaught(() -> activator.activateJfrAndRunForever(config, resource)));
}
}

return false;
private static class JavaProfiler {
private static final ScheduledExecutorService scheduler =
HelpfulExecutors.newSingleThreadedScheduledExecutor("Profiler scheduler");

static void run(JfrActivator activator, ConfigProperties config, Resource resource) {
int stackDepth = Configuration.getStackDepth(config);
LogRecordExporter logsExporter = LogExporterBuilder.fromConfig(config);
CpuEventExporter cpuEventExporter =
PprofCpuEventExporter.builder()
.otelLogger(
activator.buildOtelLogger(
SimpleLogRecordProcessor.create(logsExporter), resource))
.period(Configuration.getCallStackInterval(config))
.stackDepth(stackDepth)
.build();

Runnable profiler =
() -> {
Instant now = Instant.now();
Map<Thread, StackTraceElement[]> stackTracesMap;
Map<Thread, SpanContext> contextMap = new HashMap<>();
// disallow context changes while we are taking the thread dump
JavaContextStorage.block();
try {
stackTracesMap = Thread.getAllStackTraces();
// copy active context for each thread
for (Thread thread : stackTracesMap.keySet()) {
SpanContext spanContext = JavaContextStorage.activeContext.get(thread);
if (spanContext != null) {
contextMap.put(thread, spanContext);
}
}
} finally {
JavaContextStorage.unblock();
}
for (Map.Entry<Thread, StackTraceElement[]> entry : stackTracesMap.entrySet()) {
Thread thread = entry.getKey();
SpanContext spanContext = contextMap.get(thread);
cpuEventExporter.export(thread, entry.getValue(), now, spanContext);
}
cpuEventExporter.flush();
};
long period = Configuration.getCallStackInterval(config).toMillis();
scheduler.scheduleAtFixedRate(
logUncaught(() -> profiler.run()), period, period, TimeUnit.MILLISECONDS);
}
}

private boolean checkOutputDir(Path outputDir) {
Expand All @@ -107,7 +168,7 @@ private boolean checkOutputDir(Path outputDir) {
return true;
}

private void outdirWarn(Path dir, String suffix) {
private static void outdirWarn(Path dir, String suffix) {
logger.log(WARNING, "The configured output directory {0} {1}.", new Object[] {dir, suffix});
}

Expand All @@ -128,13 +189,12 @@ private void activateJfrAndRunForever(ConfigProperties config, Resource resource

EventReader eventReader = new EventReader();
SpanContextualizer spanContextualizer = new SpanContextualizer(eventReader);
EventPeriods periods = new EventPeriods(jfrSettings::get);
LogRecordExporter logsExporter = LogExporterBuilder.fromConfig(config);

CpuEventExporter cpuEventExporter =
PprofCpuEventExporter.builder()
.otelLogger(buildOtelLogger(SimpleLogRecordProcessor.create(logsExporter), resource))
.eventPeriods(periods)
.period(Configuration.getCallStackInterval(config))
.stackDepth(stackDepth)
.build();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,12 @@
package com.splunk.opentelemetry.profiler;

import static com.splunk.opentelemetry.profiler.Configuration.CONFIG_KEY_ENABLE_PROFILER;
import static com.splunk.opentelemetry.profiler.Configuration.CONFIG_KEY_PROFILER_JFR;
import static java.util.Collections.emptyMap;

import com.google.auto.service.AutoService;
import com.splunk.opentelemetry.profiler.contextstorage.JavaContextStorage;
import com.splunk.opentelemetry.profiler.contextstorage.JfrContextStorage;
import io.opentelemetry.context.ContextStorage;
import io.opentelemetry.sdk.autoconfigure.spi.AutoConfigurationCustomizer;
import io.opentelemetry.sdk.autoconfigure.spi.AutoConfigurationCustomizerProvider;
Expand All @@ -32,8 +35,12 @@ public class SdkCustomizer implements AutoConfigurationCustomizerProvider {
public void customize(AutoConfigurationCustomizer autoConfigurationCustomizer) {
autoConfigurationCustomizer.addPropertiesCustomizer(
config -> {
if (jfrIsAvailable() && jfrIsEnabledInConfig(config)) {
ContextStorage.addWrapper(JfrContextStorage::new);
if (profilerIsEnabledInConfig(config)) {
if (jfrIsAvailable() && jfrIsEnabledInConfig(config)) {
ContextStorage.addWrapper(JfrContextStorage::new);
} else {
ContextStorage.addWrapper(JavaContextStorage::new);
}
}
return emptyMap();
});
Expand All @@ -43,7 +50,11 @@ private boolean jfrIsAvailable() {
return JFR.instance.isAvailable();
}

private boolean jfrIsEnabledInConfig(ConfigProperties config) {
private boolean profilerIsEnabledInConfig(ConfigProperties config) {
return config.getBoolean(CONFIG_KEY_ENABLE_PROFILER, false);
}

private boolean jfrIsEnabledInConfig(ConfigProperties config) {
return config.getBoolean(CONFIG_KEY_PROFILER_JFR, true);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright Splunk Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.splunk.opentelemetry.profiler.contextstorage;

import io.opentelemetry.api.trace.Span;
import io.opentelemetry.context.Context;
import io.opentelemetry.context.ContextStorage;
import io.opentelemetry.context.Scope;
import javax.annotation.Nullable;

abstract class AbstractContextStorage implements ContextStorage {

private final ContextStorage delegate;
private final ThreadLocal<Span> activeSpan = ThreadLocal.withInitial(Span::getInvalid);

AbstractContextStorage(ContextStorage delegate) {
this.delegate = delegate;
}

@Override
public Scope attach(Context toAttach) {
Scope delegatedScope = delegate.attach(toAttach);
Span span = Span.fromContext(toAttach);
Span current = activeSpan.get();
// do nothing when active span didn't change
// do nothing if the span isn't sampled
if (span == current || !span.getSpanContext().isSampled()) {
return delegatedScope;
}

// mark new span as active and generate event
activeSpan.set(span);
activateSpan(span);
return () -> {
// restore previous active span
activeSpan.set(current);
activateSpan(current);
delegatedScope.close();
};
}

protected abstract void activateSpan(Span span);

@Nullable
@Override
public Context current() {
return delegate.current();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Copyright Splunk Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.splunk.opentelemetry.profiler.contextstorage;

import io.opentelemetry.api.trace.Span;
import io.opentelemetry.api.trace.SpanContext;
import io.opentelemetry.context.ContextStorage;
import io.opentelemetry.instrumentation.api.internal.cache.Cache;

// active context tracking for java profiler
public class JavaContextStorage extends AbstractContextStorage {

public static final Cache<Thread, SpanContext> activeContext = Cache.weak();
private static final Guard NOP = () -> {};
private static final BlockingGuard GUARD = new BlockingGuard();
private static volatile Guard guard = NOP;

public JavaContextStorage(ContextStorage delegate) {
super(delegate);
}

public static void block() {
guard = GUARD;
}

public static void unblock() {
guard = NOP;
GUARD.release();
}

@Override
protected void activateSpan(Span span) {
// when taking thread dump we block all thread that attempt to modify the active contexts
guard.stop();

SpanContext context = span.getSpanContext();
if (context.isValid()) {
activeContext.put(Thread.currentThread(), context);
} else {
activeContext.remove(Thread.currentThread());
}
}

private interface Guard {
void stop();
}

private static class BlockingGuard implements Guard {

@Override
public synchronized void stop() {
try {
while (guard == GUARD) {
wait();
}
} catch (InterruptedException exception) {
Thread.currentThread().interrupt();
}
}

synchronized void release() {
notifyAll();
}
}
}
Loading

0 comments on commit 7f13711

Please sign in to comment.