From 892c3e54e22b4d77f7769ccafc6b1c2fea3ce8c0 Mon Sep 17 00:00:00 2001 From: Michael Smithhisler Date: Thu, 7 Nov 2024 16:27:22 -0500 Subject: [PATCH] fix: handle template re-renders on client restart When multiple templates with api functions are included in a task, it's possible for consul-template to re-render templates as it creates watchers, overwriting render event data. This change uses event fields that do not get overwritten, and only executes the change mode for templates that were actually written to disk. --- .../taskrunner/template/template.go | 20 +++--- .../taskrunner/template/template_test.go | 63 +++++++++++++++++++ 2 files changed, 75 insertions(+), 8 deletions(-) diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index ca99b01a652..a2da609806d 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -277,6 +277,10 @@ func (tm *TaskTemplateManager) handleFirstRender() { <-eventTimer.C } + // dirtyEvents are events that actually rendered to disk and need to trigger + // their respective change_mode operation + dirtyEvents := map[string]*manager.RenderEvent{} + // outstandingEvent tracks whether there is an outstanding event that should // be fired. outstandingEvent := false @@ -308,22 +312,22 @@ WAIT: continue } - dirty := false for _, event := range events { // This template hasn't been rendered if event.LastWouldRender.IsZero() { continue WAIT } - if event.WouldRender && event.DidRender { - dirty = true + // If the template _actually_ rendered to disk, mark it dirty + if !event.LastDidRender.IsZero() { + dirtyEvents[event.Template.ID()] = event } } // if there's a driver handle then the task is already running and // that changes how we want to behave on first render - if dirty && tm.config.Lifecycle.IsRunning() { + if len(dirtyEvents) > 0 && tm.config.Lifecycle.IsRunning() { handledRenders := make(map[string]time.Time, len(tm.config.Templates)) - tm.onTemplateRendered(handledRenders, time.Time{}) + tm.onTemplateRendered(handledRenders, time.Time{}, dirtyEvents) } break WAIT @@ -417,12 +421,13 @@ func (tm *TaskTemplateManager) handleTemplateRerenders(allRenderedTime time.Time SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template failed: %v", err))) case <-tm.runner.TemplateRenderedCh(): - tm.onTemplateRendered(handledRenders, allRenderedTime) + events := tm.runner.RenderEvents() + tm.onTemplateRendered(handledRenders, allRenderedTime, events) } } } -func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time) { +func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time.Time, allRenderedTime time.Time, events map[string]*manager.RenderEvent) { var handling []string signals := make(map[string]struct{}) @@ -430,7 +435,6 @@ func (tm *TaskTemplateManager) onTemplateRendered(handledRenders map[string]time restart := false var splay time.Duration - events := tm.runner.RenderEvents() for id, event := range events { // First time through diff --git a/client/allocrunner/taskrunner/template/template_test.go b/client/allocrunner/taskrunner/template/template_test.go index 4811e364a5b..9ec11b13a56 100644 --- a/client/allocrunner/taskrunner/template/template_test.go +++ b/client/allocrunner/taskrunner/template/template_test.go @@ -806,6 +806,69 @@ OUTER: } } +// Tests an edge case where a task has multiple templates and the client is restarted. +// In this case, vault may re-render and overwrite some fields in the first render event +// but we still want to make sure it causes a restart. +// We cannot control the order in which these templates are rendered, so this test will +// exhibit flakiness if this edge case is not properly handled. +func TestTaskTemplateManager_FirstRender_MultiSecret(t *testing.T) { + ci.Parallel(t) + clienttestutil.RequireVault(t) + + // Make a template that will render based on a key in Vault + vaultPath := "secret/data/restart" + key := "shouldRestart" + content := "shouldRestart" + embedded := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath, key) + file := "my.tmpl" + template := &structs.Template{ + EmbeddedTmpl: embedded, + DestPath: file, + ChangeMode: structs.TemplateChangeModeRestart, + } + + vaultPath2 := "secret/data/noop" + key2 := "noop" + content2 := "noop" + embedded2 := fmt.Sprintf(`{{with secret "%s"}}{{.Data.data.%s}}{{end}}`, vaultPath2, key2) + file2 := "my.tmpl2" + template2 := &structs.Template{ + EmbeddedTmpl: embedded2, + DestPath: file2, + ChangeMode: structs.TemplateChangeModeNoop, + } + + harness := newTestHarness(t, []*structs.Template{template, template2}, false, true) + + // Write the secret to Vault + logical := harness.vault.Client.Logical() + _, err := logical.Write(vaultPath, map[string]interface{}{"data": map[string]interface{}{key: content}}) + must.NoError(t, err) + _, err = logical.Write(vaultPath2, map[string]interface{}{"data": map[string]interface{}{key2: content2}}) + must.NoError(t, err) + + // simulate task is running already + harness.mockHooks.HasHandle = true + + harness.start(t) + defer harness.stop() + + // Wait for the unblock + select { + case <-harness.mockHooks.UnblockCh: + case <-time.After(time.Duration(5*testutil.TestMultiplier()) * time.Second): + t.Fatal("Task unblock should have been called") + } + + select { + case <-harness.mockHooks.RestartCh: + case <-harness.mockHooks.SignalCh: + t.Fatal("should not have received signal", harness.mockHooks) + case <-time.After(time.Duration(1*testutil.TestMultiplier()) * time.Second): + t.Fatal("should have restarted") + } +} + func TestTaskTemplateManager_Rerender_Noop(t *testing.T) { ci.Parallel(t) clienttestutil.RequireConsul(t)