From 645923274d20bf8d77fa10c9a1428e175eccca41 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Mon, 11 Nov 2024 08:01:18 -0500 Subject: [PATCH] fsm: fix bug in snapshot restore for removed timetable (#24412) When we removed the time table in #24112 we introduced a bug where if a previous version of Nomad had written a time table entry, we'd return from the restore loop early and never load the rest of the FSM. This will result in a mostly or partially wiped state for that Nomad node, which would then be out of sync with its peers (which would also have the same problem on upgrade). The bug only occurs when the FSM is being restored from snapshot, which isn't the case if you test with a server that's only written Raft logs and not snapshotted them. While fixing this bug, we still need to ensure we're reading the time table entries even if we're throwing them away, so that we move the snapshot reader along to the next full entry. Fixes: https://github.com/hashicorp/nomad/issues/24411 --- nomad/fsm.go | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/nomad/fsm.go b/nomad/fsm.go index 292e8808b85..2c91be65933 100644 --- a/nomad/fsm.go +++ b/nomad/fsm.go @@ -1564,9 +1564,13 @@ func (n *nomadFSM) restoreImpl(old io.ReadCloser, filter *FSMFilter) error { snapType := SnapshotType(msgType[0]) switch snapType { case TimeTableSnapshot: - // COMPAT: Nomad 1.9.2 removed the timetable, this case kept to gracefully handle - // tt snapshot requests - return nil + // COMPAT: Nomad 1.9.2 removed the timetable, this case kept to + // gracefully handle tt snapshot requests + var table []TimeTableEntry + if err := dec.Decode(&table); err != nil { + return err + } + case NodeSnapshot: node := new(structs.Node) if err := dec.Decode(node); err != nil { @@ -3311,3 +3315,10 @@ func (s SnapshotType) String() string { } return fmt.Sprintf("Unknown(%d)", s) } + +// TimeTableEntry was used to track a time and index, but has been removed. We +// still need to deserialize existing entries +type TimeTableEntry struct { + Index uint64 + Time time.Time +}