From b992712caeca56edff96fccdc577a787db8e2b7a Mon Sep 17 00:00:00 2001 From: Simon Tien Date: Tue, 4 Feb 2025 12:28:54 +1100 Subject: [PATCH 1/4] feat: add leader node names to v1beta2.nodepool --- .../crds/apps.openyurt.io_nodepools.yaml | 12 +- pkg/apis/apps/v1beta2/nodepool_types.go | 11 +- .../apps/v1beta2/zz_generated.deepcopy.go | 17 ++- .../hubleader/hubleader_controller.go | 21 ++-- .../hubleader/hubleader_controller_test.go | 106 ++++++++++++++++-- 5 files changed, 144 insertions(+), 23 deletions(-) diff --git a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml index 1139efd4461..781c2446ea6 100644 --- a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml +++ b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml @@ -464,7 +464,17 @@ spec: leaderEndpoints: description: LeaderEndpoints is used for storing the address of Leader Yurthub. items: - type: string + properties: + endpoint: + description: The address of the leader yurthub + type: string + nodeName: + description: The node name of the leader yurthub + type: string + required: + - endpoint + - nodeName + type: object type: array nodes: description: The list of nodes' names in the pool diff --git a/pkg/apis/apps/v1beta2/nodepool_types.go b/pkg/apis/apps/v1beta2/nodepool_types.go index 5a69aaaccc5..f01e3c39016 100644 --- a/pkg/apis/apps/v1beta2/nodepool_types.go +++ b/pkg/apis/apps/v1beta2/nodepool_types.go @@ -107,7 +107,7 @@ type NodePoolStatus struct { // LeaderEndpoints is used for storing the address of Leader Yurthub. // +optional - LeaderEndpoints []string `json:"leaderEndpoints,omitempty"` + LeaderEndpoints []Leader `json:"leaderEndpoints,omitempty"` // Conditions represents the latest available observations of a NodePool's // current state that includes LeaderHubElection status. @@ -115,6 +115,15 @@ type NodePoolStatus struct { Conditions []NodePoolCondition `json:"conditions,omitempty"` } +// Leader represents the hub leader in a nodepool +type Leader struct { + // The node name of the leader yurthub + NodeName string `json:"nodeName"` + + // The address of the leader yurthub + Endpoint string `json:"endpoint"` +} + // NodePoolConditionType represents a NodePool condition value. type NodePoolConditionType string diff --git a/pkg/apis/apps/v1beta2/zz_generated.deepcopy.go b/pkg/apis/apps/v1beta2/zz_generated.deepcopy.go index b4b76e08dc8..33d0a6444a0 100644 --- a/pkg/apis/apps/v1beta2/zz_generated.deepcopy.go +++ b/pkg/apis/apps/v1beta2/zz_generated.deepcopy.go @@ -26,6 +26,21 @@ import ( runtime "k8s.io/apimachinery/pkg/runtime" ) +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Leader) DeepCopyInto(out *Leader) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Leader. +func (in *Leader) DeepCopy() *Leader { + if in == nil { + return nil + } + out := new(Leader) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodePool) DeepCopyInto(out *NodePool) { *out = *in @@ -159,7 +174,7 @@ func (in *NodePoolStatus) DeepCopyInto(out *NodePoolStatus) { } if in.LeaderEndpoints != nil { in, out := &in.LeaderEndpoints, &out.LeaderEndpoints - *out = make([]string, len(*in)) + *out = make([]Leader, len(*in)) copy(*out, *in) } if in.Conditions != nil { diff --git a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go index 96b06fe451d..09d00e7bcb2 100644 --- a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go +++ b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go @@ -179,9 +179,9 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a // Copy the nodepool to update updatedNodePool := nodepool.DeepCopy() - // Cache nodes in the list by internalIP -> Node + // Cache nodes in the list by Leader -> Node // if they are ready and have internal IP - endpointsMap := make(map[string]*corev1.Node) + endpointsMap := make(map[appsv1beta2.Leader]*corev1.Node) for _, n := range currentNodeList.Items { internalIP, ok := nodeutil.GetInternalIP(&n) if !ok { @@ -196,12 +196,15 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a continue } - endpointsMap[internalIP] = &n + endpointsMap[appsv1beta2.Leader{ + Endpoint: internalIP, + NodeName: n.Name, + }] = &n } // Delete leader endpoints that are not in endpoints map // They are either not ready or not longer the node list and need to be removed - leaderDeleteFn := func(endpoint string) bool { + leaderDeleteFn := func(endpoint appsv1beta2.Leader) bool { _, ok := endpointsMap[endpoint] return !ok } @@ -246,12 +249,12 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a } // hasLeadersChanged checks if the leader endpoints have changed -func hasLeadersChanged(old, new []string) bool { +func hasLeadersChanged(old, new []appsv1beta2.Leader) bool { if len(old) != len(new) { return true } - oldSet := make(map[string]struct{}, len(old)) + oldSet := make(map[appsv1beta2.Leader]struct{}, len(old)) for i := range old { oldSet[old[i]] = struct{}{} @@ -270,9 +273,9 @@ func hasLeadersChanged(old, new []string) bool { func electNLeaders( strategy string, numLeaders int, - candidates map[string]*corev1.Node, -) ([]string, bool) { - leaderEndpoints := make([]string, 0, len(candidates)) + candidates map[appsv1beta2.Leader]*corev1.Node, +) ([]appsv1beta2.Leader, bool) { + leaderEndpoints := make([]appsv1beta2.Leader, 0, len(candidates)) switch strategy { case string(appsv1beta2.ElectionStrategyMark), string(appsv1beta2.ElectionStrategyRandom): diff --git a/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go b/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go index 95d12b6c694..533b43f8acc 100644 --- a/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go +++ b/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go @@ -17,6 +17,7 @@ limitations under the License. package hubleader import ( + "cmp" "context" "slices" "testing" @@ -266,7 +267,12 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.1"}, + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP", + Endpoint: "10.0.0.1", + }, + }, }, }, expectErr: false, @@ -306,7 +312,16 @@ func TestReconcile(t *testing.T) { LeaderReplicas: 2, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.5"}, + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "ready with internal IP and marked as 2nd leader", + Endpoint: "10.0.0.5", + }, + }, }, }, expectErr: false, @@ -465,7 +480,12 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2"}, // leader already set + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", // leader already set + }, + }, }, }, expectedNodePool: &appsv1beta2.NodePool{ @@ -485,7 +505,12 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2"}, // should not change leader as replicas met + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", // should not change leader as replicas met + }, + }, }, }, expectErr: false, @@ -508,7 +533,16 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.4"}, // .4 was leader (node not ready) + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "not ready with internal IP marked as leader", + Endpoint: "10.0.0.4", // .4 was leader (node not ready) + }, + }, }, }, expectedNodePool: &appsv1beta2.NodePool{ @@ -528,7 +562,16 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.5"}, // new leader is .5 + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "ready with internal IP and marked as 2nd leader", + Endpoint: "10.0.0.5", // new leader is .5 + }, + }, }, }, expectErr: false, @@ -568,7 +611,16 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.5"}, // multiple marked leaders + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "ready with internal IP and marked as 2nd leader", + Endpoint: "10.0.0.5", + }, // multiple marked leaders + }, }, }, expectErr: false, @@ -602,7 +654,20 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.3", "10.0.0.5"}, // multiple marked leaders + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "ready with internal IP and not marked as leader", + Endpoint: "10.0.0.3", + }, + { + NodeName: "ready with internal IP and marked as 2nd leader", + Endpoint: "10.0.0.5", + }, // multiple marked leaders, + }, }, }, expectErr: false, @@ -625,7 +690,16 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2", "10.0.0.5"}, // 2 leaders set, last should be dropped + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + { + NodeName: "ready with internal IP and marked as 2nd leader", + Endpoint: "10.0.0.5", + }, // 2 leaders set, last should be dropped + }, }, }, expectedNodePool: &appsv1beta2.NodePool{ @@ -645,7 +719,12 @@ func TestReconcile(t *testing.T) { InterConnectivity: true, }, Status: appsv1beta2.NodePoolStatus{ - LeaderEndpoints: []string{"10.0.0.2"}, + LeaderEndpoints: []appsv1beta2.Leader{ + { + NodeName: "ready with internal IP and marked as leader", + Endpoint: "10.0.0.2", + }, + }, }, }, expectErr: false, @@ -682,7 +761,12 @@ func TestReconcile(t *testing.T) { // Reset resource version - it's not important for the test actualPool.ResourceVersion = "" // Sort leader endpoints for comparison - it is not important for the order - slices.Sort(actualPool.Status.LeaderEndpoints) + slices.SortStableFunc(actualPool.Status.LeaderEndpoints, func(a, b appsv1beta2.Leader) int { + return cmp.Compare( + a.Endpoint, + b.Endpoint, + ) + }) require.Equal(t, *tc.expectedNodePool, actualPool) }) From 74b6d822da3000af699f6cc5b7c1583fdd203a3b Mon Sep 17 00:00:00 2001 From: Simon Tien Date: Tue, 4 Feb 2025 12:35:54 +1100 Subject: [PATCH 2/4] fix: manifests --- charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml index 781c2446ea6..316d81bf114 100644 --- a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml +++ b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml @@ -464,6 +464,7 @@ spec: leaderEndpoints: description: LeaderEndpoints is used for storing the address of Leader Yurthub. items: + description: Leader represents the hub leader in a nodepool properties: endpoint: description: The address of the leader yurthub From 8136ddd2261c7bfd013feab27b6045dd2f9ff683 Mon Sep 17 00:00:00 2001 From: Simon Tien Date: Tue, 4 Feb 2025 14:11:45 +1100 Subject: [PATCH 3/4] fix: rename endpoint to address --- .../crds/apps.openyurt.io_nodepools.yaml | 4 +- pkg/apis/apps/v1beta2/nodepool_types.go | 2 +- .../hubleader/hubleader_controller.go | 18 ++++----- .../hubleader/hubleader_controller_test.go | 38 +++++++++---------- 4 files changed, 31 insertions(+), 31 deletions(-) diff --git a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml index 316d81bf114..a8f0798e4a2 100644 --- a/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml +++ b/charts/yurt-manager/crds/apps.openyurt.io_nodepools.yaml @@ -466,14 +466,14 @@ spec: items: description: Leader represents the hub leader in a nodepool properties: - endpoint: + address: description: The address of the leader yurthub type: string nodeName: description: The node name of the leader yurthub type: string required: - - endpoint + - address - nodeName type: object type: array diff --git a/pkg/apis/apps/v1beta2/nodepool_types.go b/pkg/apis/apps/v1beta2/nodepool_types.go index f01e3c39016..3722ba590ef 100644 --- a/pkg/apis/apps/v1beta2/nodepool_types.go +++ b/pkg/apis/apps/v1beta2/nodepool_types.go @@ -121,7 +121,7 @@ type Leader struct { NodeName string `json:"nodeName"` // The address of the leader yurthub - Endpoint string `json:"endpoint"` + Address string `json:"address"` } // NodePoolConditionType represents a NodePool condition value. diff --git a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go index 09d00e7bcb2..51352cc5aea 100644 --- a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go +++ b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go @@ -181,7 +181,7 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a // Cache nodes in the list by Leader -> Node // if they are ready and have internal IP - endpointsMap := make(map[appsv1beta2.Leader]*corev1.Node) + leadersMap := make(map[appsv1beta2.Leader]*corev1.Node) for _, n := range currentNodeList.Items { internalIP, ok := nodeutil.GetInternalIP(&n) if !ok { @@ -196,16 +196,16 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a continue } - endpointsMap[appsv1beta2.Leader{ - Endpoint: internalIP, + leadersMap[appsv1beta2.Leader{ + Address: internalIP, NodeName: n.Name, }] = &n } - // Delete leader endpoints that are not in endpoints map + // Delete leaders that are not in leaders map // They are either not ready or not longer the node list and need to be removed - leaderDeleteFn := func(endpoint appsv1beta2.Leader) bool { - _, ok := endpointsMap[endpoint] + leaderDeleteFn := func(leader appsv1beta2.Leader) bool { + _, ok := leadersMap[leader] return !ok } updatedLeaders := slices.DeleteFunc(updatedNodePool.Status.LeaderEndpoints, leaderDeleteFn) @@ -214,13 +214,13 @@ func (r *ReconcileHubLeader) reconcileHubLeader(ctx context.Context, nodepool *a if len(updatedLeaders) < int(nodepool.Spec.LeaderReplicas) { // Remove current leaders from candidates for _, leader := range updatedLeaders { - delete(endpointsMap, leader) + delete(leadersMap, leader) } leaders, ok := electNLeaders( nodepool.Spec.LeaderElectionStrategy, int(nodepool.Spec.LeaderReplicas)-len(updatedLeaders), - endpointsMap, + leadersMap, ) if !ok { klog.Errorf("Failed to elect a leader for NodePool %s", nodepool.Name) @@ -275,7 +275,7 @@ func electNLeaders( numLeaders int, candidates map[appsv1beta2.Leader]*corev1.Node, ) ([]appsv1beta2.Leader, bool) { - leaderEndpoints := make([]appsv1beta2.Leader, 0, len(candidates)) + leaderEndpoints := make([]appsv1beta2.Leader, 0, numLeaders) switch strategy { case string(appsv1beta2.ElectionStrategyMark), string(appsv1beta2.ElectionStrategyRandom): diff --git a/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go b/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go index 533b43f8acc..eb997bb69d8 100644 --- a/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go +++ b/pkg/yurtmanager/controller/hubleader/hubleader_controller_test.go @@ -270,7 +270,7 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP", - Endpoint: "10.0.0.1", + Address: "10.0.0.1", }, }, }, @@ -315,11 +315,11 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "ready with internal IP and marked as 2nd leader", - Endpoint: "10.0.0.5", + Address: "10.0.0.5", }, }, }, @@ -483,7 +483,7 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", // leader already set + Address: "10.0.0.2", // leader already set }, }, }, @@ -508,7 +508,7 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", // should not change leader as replicas met + Address: "10.0.0.2", // should not change leader as replicas met }, }, }, @@ -536,11 +536,11 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "not ready with internal IP marked as leader", - Endpoint: "10.0.0.4", // .4 was leader (node not ready) + Address: "10.0.0.4", // .4 was leader (node not ready) }, }, }, @@ -565,11 +565,11 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "ready with internal IP and marked as 2nd leader", - Endpoint: "10.0.0.5", // new leader is .5 + Address: "10.0.0.5", // new leader is .5 }, }, }, @@ -614,11 +614,11 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "ready with internal IP and marked as 2nd leader", - Endpoint: "10.0.0.5", + Address: "10.0.0.5", }, // multiple marked leaders }, }, @@ -657,15 +657,15 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "ready with internal IP and not marked as leader", - Endpoint: "10.0.0.3", + Address: "10.0.0.3", }, { NodeName: "ready with internal IP and marked as 2nd leader", - Endpoint: "10.0.0.5", + Address: "10.0.0.5", }, // multiple marked leaders, }, }, @@ -693,11 +693,11 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, { NodeName: "ready with internal IP and marked as 2nd leader", - Endpoint: "10.0.0.5", + Address: "10.0.0.5", }, // 2 leaders set, last should be dropped }, }, @@ -722,7 +722,7 @@ func TestReconcile(t *testing.T) { LeaderEndpoints: []appsv1beta2.Leader{ { NodeName: "ready with internal IP and marked as leader", - Endpoint: "10.0.0.2", + Address: "10.0.0.2", }, }, }, @@ -763,8 +763,8 @@ func TestReconcile(t *testing.T) { // Sort leader endpoints for comparison - it is not important for the order slices.SortStableFunc(actualPool.Status.LeaderEndpoints, func(a, b appsv1beta2.Leader) int { return cmp.Compare( - a.Endpoint, - b.Endpoint, + a.Address, + b.Address, ) }) From 4589beb185b9b98e3f2df58ea58a7c41e933f0d1 Mon Sep 17 00:00:00 2001 From: Simon Tien Date: Tue, 4 Feb 2025 18:06:18 +1100 Subject: [PATCH 4/4] fix: return if no candidates --- pkg/yurtmanager/controller/hubleader/hubleader_controller.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go index 51352cc5aea..3a70e860b6e 100644 --- a/pkg/yurtmanager/controller/hubleader/hubleader_controller.go +++ b/pkg/yurtmanager/controller/hubleader/hubleader_controller.go @@ -275,6 +275,11 @@ func electNLeaders( numLeaders int, candidates map[appsv1beta2.Leader]*corev1.Node, ) ([]appsv1beta2.Leader, bool) { + // No candidates to elect leaders from + if len(candidates) == 0 { + return nil, true + } + leaderEndpoints := make([]appsv1beta2.Leader, 0, numLeaders) switch strategy {