Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PROF-9791: Use heuristics to start the profiler when requested through SSI #4322

Merged
merged 11 commits into from
May 30, 2024
Merged
127 changes: 104 additions & 23 deletions integration-tests/profiler/profiler.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,33 @@ if (process.platform !== 'win32') {
}

function checkProfiles (agent, proc, timeout,
expectedProfileTypes = DEFAULT_PROFILE_TYPES, expectBadExit = false, multiplicity = 1) {
expectedProfileTypes = DEFAULT_PROFILE_TYPES, expectBadExit = false, multiplicity = 1
) {
return Promise.all([
processExitPromise(proc, timeout, expectBadExit),
expectProfileMessagePromise(agent, timeout, expectedProfileTypes, multiplicity)
])
}

function expectProfileMessagePromise (agent, timeout,
expectedProfileTypes = DEFAULT_PROFILE_TYPES, multiplicity = 1
) {
const fileNames = expectedProfileTypes.map(type => `${type}.pprof`)
const resultPromise = agent.assertMessageReceived(({ headers, payload, files }) => {
assert.propertyVal(headers, 'host', `127.0.0.1:${agent.port}`)
assert.propertyVal(files[0], 'originalname', 'event.json')
const event = JSON.parse(files[0].buffer.toString())
assert.propertyVal(event, 'family', 'node')
assert.deepPropertyVal(event, 'attachments', fileNames)
for (const [index, fileName] of fileNames.entries()) {
assert.propertyVal(files[index + 1], 'originalname', fileName)
return agent.assertMessageReceived(({ headers, _, files }) => {
try {
assert.propertyVal(headers, 'host', `127.0.0.1:${agent.port}`)
assert.propertyVal(files[0], 'originalname', 'event.json')
const event = JSON.parse(files[0].buffer.toString())
assert.propertyVal(event, 'family', 'node')
assert.deepPropertyVal(event, 'attachments', fileNames)
for (const [index, fileName] of fileNames.entries()) {
assert.propertyVal(files[index + 1], 'originalname', fileName)
}
} catch (e) {
e.message += ` ${JSON.stringify({ headers, files })}`
throw e
}
}, timeout, multiplicity)

return Promise.all([processExitPromise(proc, timeout, expectBadExit), resultPromise])
}

function processExitPromise (proc, timeout, expectBadExit = false) {
Expand Down Expand Up @@ -73,6 +86,18 @@ async function getLatestProfile (cwd, pattern) {
return { profile: Profile.decode(pprofUnzipped), encoded: pprofGzipped.toString('base64') }
}

function expectTimeout (messagePromise, allowErrors = false) {
return messagePromise.then(
() => {
throw new Error('Received unexpected message')
}, (e) => {
if (e.message !== 'timeout' && (!allowErrors || !e.message.startsWith('timeout, additionally:'))) {
throw e
}
}
)
}

async function gatherNetworkTimelineEvents (cwd, scriptFilePath, eventType, args) {
const procStart = BigInt(Date.now() * 1000000)
const proc = fork(path.join(cwd, scriptFilePath), args, {
Expand Down Expand Up @@ -142,6 +167,7 @@ describe('profiler', () => {
let sandbox
let cwd
let profilerTestFile
let ssiTestFile
let oomTestFile
let oomEnv
let oomExecArgv
Expand All @@ -151,6 +177,7 @@ describe('profiler', () => {
sandbox = await createSandbox()
cwd = sandbox.folder
profilerTestFile = path.join(cwd, 'profiler/index.js')
ssiTestFile = path.join(cwd, 'profiler/ssi.js')
oomTestFile = path.join(cwd, 'profiler/oom.js')
oomExecArgv = ['--max-old-space-size=50']
})
Expand Down Expand Up @@ -355,19 +382,10 @@ describe('profiler', () => {
DD_PROFILING_ENABLED: 1
}
})
const checkTelemetry = agent.assertTelemetryReceived(({ headers, payload }) => {
}, 1000, 'generate-metrics')
const checkTelemetry = agent.assertTelemetryReceived(_ => {}, 1000, 'generate-metrics')
// SSI telemetry is not supposed to have been emitted when DD_INJECTION_ENABLED is absent,
// so throw if telemetry callback was invoked and do nothing if it timed out
const checkNoTelemetry = checkTelemetry.then(
() => {
throw new Error('Received unexpected metrics')
}, (e) => {
if (e.message !== 'timeout') {
throw e
}
})
return Promise.all([checkProfiles(agent, proc, timeout), checkNoTelemetry])
// so expect telemetry callback to time out
return Promise.all([checkProfiles(agent, proc, timeout), expectTimeout(checkTelemetry)])
})

it('records SSI telemetry on process exit', () => {
Expand Down Expand Up @@ -469,4 +487,67 @@ describe('profiler', () => {
})
}
})

context('SSI heuristics', () => {
beforeEach(async () => {
agent = await new FakeAgent().start()
})

afterEach(async () => {
proc.kill()
await agent.stop()
})

describe('does not trigger for', () => {
it('a short-lived app that creates no spans', () => {
return heuristicsDoesNotTriggerFor([], false)
})

it('a short-lived app that creates a span', () => {
return heuristicsDoesNotTriggerFor(['create-span'], true)
})

it('a long-lived app that creates no spans', () => {
return heuristicsDoesNotTriggerFor(['long-lived'], false)
})
})

it('triggers for long-lived span-creating app', () => {
return checkProfiles(agent,
forkSsi(['create-span', 'long-lived']),
timeout,
DEFAULT_PROFILE_TYPES,
false,
// Will receive 2 messages: first one is for the trace, second one is for the profile. We
// only need the assertions in checkProfiles to succeed for the one with the profile.
2)
})
})

function forkSsi (args) {
return fork(ssiTestFile, args, {
cwd,
env: {
DD_TRACE_AGENT_PORT: agent.port,
DD_INJECTION_ENABLED: 'profiler',
DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD: '1300'
}
})
}

function heuristicsDoesNotTriggerFor (args, allowTraceMessage) {
proc = fork(ssiTestFile, args, {
cwd,
env: {
DD_TRACE_AGENT_PORT: agent.port,
DD_INJECTION_ENABLED: 'profiler',
DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD: '1300'
}
})

return Promise.all([
processExitPromise(proc, timeout, false),
expectTimeout(expectProfileMessagePromise(agent, 1500), allowTraceMessage)
])
}
})
24 changes: 24 additions & 0 deletions integration-tests/profiler/ssi.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
'use strict'

const DDTrace = require('dd-trace')

const tracer = DDTrace.init()

async function run () {
const tasks = []
// If launched with 'create-span', the app will create a span.
if (process.argv.includes('create-span')) {
tasks.push(tracer.trace('woo', _ => {
return new Promise(setImmediate)
}))
}
// If launched with 'long-lived', the app will remain alive long enough to
// be considered long-lived by profiler activation heuristics.
if (process.argv.includes('long-lived')) {
const longLivedThreshold = Number(process.env.DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD)
tasks.push(new Promise(resolve => setTimeout(resolve, longLivedThreshold + 200)))
}
await Promise.all(tasks)
}

tracer.profilerStarted().then(run)
17 changes: 16 additions & 1 deletion packages/dd-trace/src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -485,9 +485,12 @@ class Config {
this._setValue(defaults, 'peerServiceMapping', {})
this._setValue(defaults, 'plugins', true)
this._setValue(defaults, 'port', '8126')
this._setValue(defaults, 'profiling.enabled', false)
this._setValue(defaults, 'profiling.enabled', undefined)
this._setValue(defaults, 'profiling.exporters', 'agent')
this._setValue(defaults, 'profiling.sourceMap', true)
this._setValue(defaults, 'profiling.ssi', false)
this._setValue(defaults, 'profiling.heuristicsEnabled', false)
this._setValue(defaults, 'profiling.shortLivedThreshold', undefined)
this._setValue(defaults, 'protocolVersion', '0.4')
this._setValue(defaults, 'queryStringObfuscation', qsRegex)
this._setValue(defaults, 'remoteConfig.enabled', true)
Expand Down Expand Up @@ -558,6 +561,7 @@ class Config {
DD_PROFILING_ENABLED,
DD_PROFILING_EXPORTERS,
DD_PROFILING_SOURCE_MAP,
DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD,
DD_REMOTE_CONFIGURATION_ENABLED,
DD_REMOTE_CONFIG_POLL_INTERVAL_SECONDS,
DD_RUNTIME_METRICS_ENABLED,
Expand Down Expand Up @@ -663,6 +667,17 @@ class Config {
this._setBoolean(env, 'profiling.enabled', coalesce(DD_EXPERIMENTAL_PROFILING_ENABLED, DD_PROFILING_ENABLED))
this._setString(env, 'profiling.exporters', DD_PROFILING_EXPORTERS)
this._setBoolean(env, 'profiling.sourceMap', DD_PROFILING_SOURCE_MAP && !isFalse(DD_PROFILING_SOURCE_MAP))
if (DD_INJECTION_ENABLED) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it ok to put logic here? this seems to stray from the simple this._setBoolean/Value sequence of statement.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think yes. There's some other if statements in the method already.

this._setBoolean(env, 'profiling.ssi', true)
if (DD_INJECTION_ENABLED.split(',').includes('profiler')) {
this._setBoolean(env, 'profiling.heuristicsEnabled', true)
}
if (DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Having two different naming for the same thing (long lived / short lived threshold) feels weird.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know – it already was there as "short lived" for telemetry, but we usually mentioned a "long-lived" process so this felt more natural. I was vacillating on renaming the internal variable but then thought I might do it in a separate step later to keep this smaller. I might add a commit to fix this though.

// This is only used in testing to not have to wait 30s
this._setValue(env, 'profiling.shortLivedThreshold', Number(DD_INTERNAL_PROFILING_LONG_LIVED_THRESHOLD))
}
}

this._setString(env, 'protocolVersion', DD_TRACE_AGENT_PROTOCOL_VERSION)
this._setString(env, 'queryStringObfuscation', DD_TRACE_OBFUSCATION_QUERY_STRING_REGEXP)
this._setBoolean(env, 'remoteConfig.enabled', coalesce(
Expand Down
3 changes: 2 additions & 1 deletion packages/dd-trace/src/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process.once('beforeExit', () => { profiler.stop() })
module.exports = {
start: config => {
const { service, version, env, url, hostname, port, tags, repositoryUrl, commitSHA } = config
const { enabled, sourceMap, exporters } = config.profiling
const { enabled, sourceMap, exporters, heuristicsEnabled } = config.profiling
const logger = {
debug: (message) => log.debug(message),
info: (message) => log.info(message),
Expand All @@ -19,6 +19,7 @@ module.exports = {

return profiler.start({
enabled,
heuristicsEnabled,
service,
version,
env,
Expand Down
1 change: 1 addition & 0 deletions packages/dd-trace/src/profiling/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class Config {
DD_PROFILING_PPROF_PREFIX, '')

this.enabled = enabled
this.heuristicsEnabled = options.heuristicsEnabled
this.service = service
this.env = env
this.host = host
Expand Down
2 changes: 1 addition & 1 deletion packages/dd-trace/src/profiling/profiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class Profiler extends EventEmitter {
if (this._enabled) return true

const config = this._config = new Config(options)
if (!config.enabled) return false
if (!config.enabled && !config.heuristicsEnabled) return false

this._logger = config.logger
this._enabled = true
Expand Down
Loading
Loading