-
Notifications
You must be signed in to change notification settings - Fork 898
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
handle state machine deletion for state-based replication #7177
Changes from 6 commits
fb4f321
d65ca80
430fbe0
d20d766
e095370
276c3b5
d877cdb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -239,14 +239,15 @@ type ( | |
workflowTaskManager *workflowTaskStateMachine | ||
QueryRegistry QueryRegistry | ||
|
||
shard shard.Context | ||
clusterMetadata cluster.Metadata | ||
eventsCache events.Cache | ||
config *configs.Config | ||
timeSource clock.TimeSource | ||
logger log.Logger | ||
metricsHandler metrics.Handler | ||
stateMachineNode *hsm.Node | ||
shard shard.Context | ||
clusterMetadata cluster.Metadata | ||
eventsCache events.Cache | ||
config *configs.Config | ||
timeSource clock.TimeSource | ||
logger log.Logger | ||
metricsHandler metrics.Handler | ||
stateMachineNode *hsm.Node | ||
subStateMachineDeleted bool | ||
|
||
// Tracks all events added via the AddHistoryEvent method that is used by the state machine framework. | ||
currentTransactionAddedStateMachineEventTypes []enumspb.EventType | ||
|
@@ -4335,6 +4336,29 @@ func (ms *MutableStateImpl) AddWorkflowExecutionUpdateAdmittedEvent(request *upd | |
return event, nil | ||
} | ||
|
||
func (ms *MutableStateImpl) DeleteSubStateMachine(path *persistencespb.StateMachinePath) error { | ||
incomingPath := make([]hsm.Key, len(path.Path)) | ||
for i, p := range path.Path { | ||
incomingPath[i] = hsm.Key{Type: p.Type, ID: p.Id} | ||
} | ||
|
||
root := ms.HSM() | ||
node, err := root.Child(incomingPath) | ||
if err != nil { | ||
if !errors.Is(err, hsm.ErrStateMachineNotFound) { | ||
return err | ||
} | ||
// node is already deleted. | ||
return nil | ||
} | ||
err = node.Parent.DeleteChild(node.Key) | ||
if err != nil { | ||
return err | ||
} | ||
ms.subStateMachineDeleted = true | ||
return nil | ||
} | ||
|
||
// ApplyWorkflowExecutionUpdateAdmittedEvent applies a WorkflowExecutionUpdateAdmittedEvent to mutable state. | ||
func (ms *MutableStateImpl) ApplyWorkflowExecutionUpdateAdmittedEvent(event *historypb.HistoryEvent, batchId int64) error { | ||
attrs := event.GetWorkflowExecutionUpdateAdmittedEventAttributes() | ||
|
@@ -6290,6 +6314,7 @@ func (ms *MutableStateImpl) cleanupTransaction() error { | |
ms.timerInfosUserDataUpdated = make(map[string]struct{}) | ||
ms.activityInfosUserDataUpdated = make(map[int64]struct{}) | ||
ms.reapplyEventsCandidate = nil | ||
ms.subStateMachineDeleted = false | ||
|
||
ms.stateInDB = ms.executionState.State | ||
ms.nextEventIDInDB = ms.GetNextEventID() | ||
|
@@ -7351,16 +7376,9 @@ func (ms *MutableStateImpl) syncExecutionInfo(current *persistencespb.WorkflowEx | |
} | ||
|
||
func (ms *MutableStateImpl) syncSubStateMachinesByType(incoming map[string]*persistencespb.StateMachineMap) error { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yycptt do you see an issue with this? I assume that we always just want to do a full sync here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm are we still using the We can also remove the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. when source cluster is generating snapshot, the TransitionCount is reset to 1. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the code, looks like @bergundy can you confirm? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PopTask() removed. |
||
// check if there is node been deleted | ||
currentHSM := ms.HSM() | ||
|
||
// we don't care about the root here which is the entire mutable state | ||
incomingHSM, err := hsm.NewRoot( | ||
ms.shard.StateMachineRegistry(), | ||
StateMachineType, | ||
ms, | ||
incoming, | ||
ms, | ||
) | ||
incomingHSM, err := hsm.NewRoot(ms.shard.StateMachineRegistry(), StateMachineType, ms, incoming, ms) | ||
if err != nil { | ||
return err | ||
} | ||
|
@@ -7370,22 +7388,19 @@ func (ms *MutableStateImpl) syncSubStateMachinesByType(incoming map[string]*pers | |
// skip root which is the entire mutable state | ||
return nil | ||
} | ||
|
||
incomingNodePath := incomingNode.Path() | ||
currentNode, err := currentHSM.Child(incomingNodePath) | ||
if err != nil { | ||
// 1. Already done history resend if needed before, | ||
// and node creation today always associated with an event | ||
// 2. Node deletion is not supported right now. | ||
// Based on 1 and 2, node should always be found here. | ||
return err | ||
_, err := currentHSM.Child(incomingNodePath) | ||
if err != nil && errors.Is(err, hsm.ErrStateMachineNotFound) { | ||
ms.subStateMachineDeleted = true | ||
return nil | ||
} | ||
|
||
return currentNode.Sync(incomingNode) | ||
return err | ||
}); err != nil { | ||
return err | ||
} | ||
|
||
ms.executionInfo.SubStateMachinesByType = incoming | ||
ms.mustInitHSM() | ||
Comment on lines
+7402
to
+7403
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just please make sure that we regenerate the relevant timers from mutable state and delete any timers for state machines that were deleted during the full sync. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated as discussed offline. |
||
return nil | ||
} | ||
|
||
|
@@ -7417,6 +7432,8 @@ func (ms *MutableStateImpl) applyTombstones(tombstoneBatches []*persistencespb.S | |
if _, ok := ms.pendingSignalInfoIDs[tombstone.GetSignalExternalInitiatedEventId()]; ok { | ||
err = ms.DeletePendingSignal(tombstone.GetSignalExternalInitiatedEventId()) | ||
} | ||
case *persistencespb.StateMachineTombstone_StateMachinePath: | ||
err = ms.DeleteSubStateMachine(tombstone.GetStateMachinePath()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also need to I have a TODO in task_refresher.go about this (~L660). Probably need to find a way to trigger the trim only once. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to trim every time when we close a transaction, @justinp-tt is working on it. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a flag to track if subStateMachine is deleted. |
||
default: | ||
// TODO: updateID and stateMachinePath | ||
err = serviceerror.NewInternal("unknown tombstone type") | ||
|
@@ -7563,3 +7580,7 @@ func (ms *MutableStateImpl) AddReapplyCandidateEvent(event *historypb.HistoryEve | |
func (ms *MutableStateImpl) GetReapplyCandidateEvents() []*historypb.HistoryEvent { | ||
return ms.reapplyEventsCandidate | ||
} | ||
|
||
func (ms *MutableStateImpl) IsSubStateMachineDeleted() bool { | ||
return ms.subStateMachineDeleted | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,6 +28,7 @@ import ( | |
"time" | ||
|
||
persistencespb "go.temporal.io/server/api/persistence/v1" | ||
"go.temporal.io/server/common/util" | ||
"go.temporal.io/server/service/history/hsm" | ||
"go.temporal.io/server/service/history/tasks" | ||
"google.golang.org/protobuf/types/known/timestamppb" | ||
|
@@ -36,10 +37,16 @@ import ( | |
// AddNextStateMachineTimerTask generates a state machine timer task if the first deadline doesn't have a task scheduled | ||
// yet. | ||
func AddNextStateMachineTimerTask(ms MutableState) { | ||
timers := ms.GetExecutionInfo().StateMachineTimers | ||
// filter out empty timer groups | ||
timers := util.FilterSlice(ms.GetExecutionInfo().StateMachineTimers, func(timerGroup *persistencespb.StateMachineTimerGroup) bool { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I think you can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated |
||
return len(timerGroup.Infos) > 0 | ||
}) | ||
ms.GetExecutionInfo().StateMachineTimers = timers | ||
|
||
if len(timers) == 0 { | ||
return | ||
} | ||
|
||
timerGroup := timers[0] | ||
// We already have a timer for this deadline. | ||
if timerGroup.Scheduled { | ||
|
@@ -122,7 +129,9 @@ func TrimStateMachineTimers( | |
|
||
trimmedTaskInfos = append(trimmedTaskInfos, taskInfo) | ||
} | ||
if len(trimmedTaskInfos) > 0 { | ||
if len(trimmedTaskInfos) > 0 || timerGroup.Scheduled { | ||
// We still want to keep the timer group if it has been scheduled even if it has no task info. | ||
// This will prevent us from scheduling a new timer task for the same group. | ||
trimmedStateMachineTimers = append(trimmedStateMachineTimers, &persistencespb.StateMachineTimerGroup{ | ||
Infos: trimmedTaskInfos, | ||
Deadline: timerGroup.Deadline, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't see this being reset anywhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
reset in cleanupTransaction()