From 36eedc121bff8d83fc6a4590f468397f56d0bd14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20C=C3=B4t=C3=A9?= Date: Fri, 13 Sep 2024 14:40:29 -0400 Subject: [PATCH] Fix bug in calculating when ad-hoc tasks are out of attempts (#192907) In this PR, I'm fixing a bug where ad-hoc tasks would have one fewer attempts to retry in failure scenarios when using mget. ## To verify 1. Apply the following diff to your code ``` diff --git a/x-pack/plugins/stack_connectors/server/connector_types/server_log/index.ts b/x-pack/plugins/stack_connectors/server/connector_types/server_log/index.ts index 0275b2bdc2f..d481c3820a1 100644 --- a/x-pack/plugins/stack_connectors/server/connector_types/server_log/index.ts +++ b/x-pack/plugins/stack_connectors/server/connector_types/server_log/index.ts @@ -77,6 +77,10 @@ export function getConnectorType(): ServerLogConnectorType { async function executor( execOptions: ServerLogConnectorTypeExecutorOptions ): Promise> { + + console.log('*** Server log execution'); + throw new Error('Fail'); + const { actionId, params, logger } = execOptions; const sanitizedMessage = withoutControlCharacters(params.message); diff --git a/x-pack/plugins/task_manager/server/config.ts b/x-pack/plugins/task_manager/server/config.ts index db07494ef4f..07e277f8d16 100644 --- a/x-pack/plugins/task_manager/server/config.ts +++ b/x-pack/plugins/task_manager/server/config.ts @@ -202,7 +202,7 @@ export const configSchema = schema.object( max: 100, min: 1, }), - claim_strategy: schema.string({ defaultValue: CLAIM_STRATEGY_UPDATE_BY_QUERY }), + claim_strategy: schema.string({ defaultValue: CLAIM_STRATEGY_MGET }), request_timeouts: requestTimeoutsConfig, }, { diff --git a/x-pack/plugins/task_manager/server/lib/get_retry_at.ts b/x-pack/plugins/task_manager/server/lib/get_retry_at.ts index 278ba18642d..c8fb911d500 100644 --- a/x-pack/plugins/task_manager/server/lib/get_retry_at.ts +++ b/x-pack/plugins/task_manager/server/lib/get_retry_at.ts @@ -54,6 +54,7 @@ export function getRetryDate({ } export function calculateDelayBasedOnAttempts(attempts: number) { + return 10 * 1000; // Return 30s for the first retry attempt if (attempts === 1) { return 30 * 1000; ``` 2. Create an always firing rule that runs every hour, triggering a server log on check intervals 3. Let the rule run and observe the server log action running and failing three times (as compared to two) --- .../server/task_running/task_runner.test.ts | 26 +++++++++++++++++++ .../server/task_running/task_runner.ts | 4 +++ 2 files changed, 30 insertions(+) diff --git a/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts b/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts index fd9d8363eb5c2..35aa255341f97 100644 --- a/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts +++ b/x-pack/plugins/task_manager/server/task_running/task_runner.test.ts @@ -2304,6 +2304,32 @@ describe('TaskManagerRunner', () => { expect(runner.isAdHocTaskAndOutOfAttempts).toEqual(true); }); + + it(`should return true if attempts = max attempts and in claiming status`, async () => { + const { runner } = await pendingStageSetup({ + instance: { + id: 'foo', + taskType: 'testbar', + attempts: 5, + status: TaskStatus.Claiming, + }, + }); + + expect(runner.isAdHocTaskAndOutOfAttempts).toEqual(true); + }); + + it(`should return false if attempts = max attempts and in running status`, async () => { + const { runner } = await pendingStageSetup({ + instance: { + id: 'foo', + taskType: 'testbar', + attempts: 5, + status: TaskStatus.Running, + }, + }); + + expect(runner.isAdHocTaskAndOutOfAttempts).toEqual(false); + }); }); describe('removeTask()', () => { diff --git a/x-pack/plugins/task_manager/server/task_running/task_runner.ts b/x-pack/plugins/task_manager/server/task_running/task_runner.ts index 002fcfec1a41e..f125aee5952ce 100644 --- a/x-pack/plugins/task_manager/server/task_running/task_runner.ts +++ b/x-pack/plugins/task_manager/server/task_running/task_runner.ts @@ -294,6 +294,10 @@ export class TaskManagerRunner implements TaskRunner { * running a task, the task should be deleted instead of ran. */ public get isAdHocTaskAndOutOfAttempts() { + if (this.instance.task.status === 'running') { + // This function gets called with tasks marked as running when using MGET, so attempts is already incremented + return !this.instance.task.schedule && this.instance.task.attempts > this.getMaxAttempts(); + } return !this.instance.task.schedule && this.instance.task.attempts >= this.getMaxAttempts(); }