Skip to content

Commit

Permalink
fix: switchover failed (#546)
Browse files Browse the repository at this point in the history
Co-authored-by: wangyelei <[email protected]>
(cherry picked from commit 99ff4bf)
  • Loading branch information
wangyelei committed Feb 6, 2025
1 parent 4d5471f commit f147a40
Show file tree
Hide file tree
Showing 9 changed files with 64 additions and 44 deletions.
1 change: 1 addition & 0 deletions docs/user_docs/cli/kbcli_cluster_promote.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ kbcli cluster promote NAME [--instance <instance-name>] [flags]
--edit Edit the API resource before creating
--force skip the pre-checks of the opsRequest to run the opsRequest forcibly
-h, --help help for promote
--instance string Specify the instance name that will transfer its role to the candidate pod, If not set, the current primary or leader of the cluster will be used.
--name string OpsRequest name. if not specified, it will be randomly generated
-o, --output format Prints the output in the specified format. Allowed values: JSON and YAML (default yaml)
--ttlSecondsAfterSucceed int Time to live after the OpsRequest succeed
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ require (
github.com/Masterminds/semver/v3 v3.3.0
github.com/NimbleMarkets/ntcharts v0.1.2
github.com/apecloud/dbctl v0.0.0-20240827084000-68a1980b1a46
github.com/apecloud/kubeblocks v1.0.0-beta.23
github.com/apecloud/kubeblocks v1.0.0-beta.25
github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2
github.com/briandowns/spinner v1.23.0
github.com/chaos-mesh/chaos-mesh/api v0.0.0-20230912020346-a5d89c1c90ad
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -677,8 +677,8 @@ github.com/apache/arrow/go/v11 v11.0.0/go.mod h1:Eg5OsL5H+e299f7u5ssuXsuHQVEGC4x
github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU=
github.com/apecloud/dbctl v0.0.0-20240827084000-68a1980b1a46 h1:+Jcc7IjDGxPgIfIkGX2Q5Yxj35U65zgcfjh0B9rDhjo=
github.com/apecloud/dbctl v0.0.0-20240827084000-68a1980b1a46/go.mod h1:eksJtZ7z1nVcVLqDzAdcN5EfpHwXllIAvHZEks2zWys=
github.com/apecloud/kubeblocks v1.0.0-beta.23 h1:JrQBB9gJ/jtMD9wHv5js26rdfNQgeoU5+GcUEiaAmFU=
github.com/apecloud/kubeblocks v1.0.0-beta.23/go.mod h1:b656nTyvHhwRwOuwNpOPG87Q0Lba3ygGRWoSOacPt5o=
github.com/apecloud/kubeblocks v1.0.0-beta.25 h1:wWnvGSt/0emzvnbKSIVUQ5/nB9IeRrsgtsqFsu2Jfis=
github.com/apecloud/kubeblocks v1.0.0-beta.25/go.mod h1:b656nTyvHhwRwOuwNpOPG87Q0Lba3ygGRWoSOacPt5o=
github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
Expand Down
2 changes: 1 addition & 1 deletion pkg/action/template/cluster_operations_template.cue
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ content: {
switchover: [{
componentObjectName: options.componentObjectName
instanceName: options.instance
candidateName: options.instance
candidateName: options.candidate
}]
}
if options.type == "RebuildInstance" {
Expand Down
1 change: 0 additions & 1 deletion pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,6 @@ func (o *ClusterObjects) GetInstanceInfo() []*InstanceInfo {
Component: componentName,
Status: o.getPodPhase(&pod),
Role: getLabelVal(pod.Labels, constant.RoleLabelKey),
AccessMode: getLabelVal(pod.Labels, constant.AccessModeLabelKey),
CreatedTime: util.TimeFormat(&pod.CreationTimestamp),
}
var componentSpec *kbappsv1.ClusterComponentSpec
Expand Down
56 changes: 40 additions & 16 deletions pkg/cmd/cluster/operations.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ package cluster
import (
"context"
"fmt"
"math"
"strings"

appsv1 "github.com/apecloud/kubeblocks/apis/apps/v1"
Expand Down Expand Up @@ -111,6 +112,7 @@ type OperationsOptions struct {
Component string `json:"component"`
ComponentObjectName string `json:"componentObjectName,omitempty"`
Instance string `json:"instance"`
Candidate string `json:"candidate"`
BackupName string `json:"-"`
Inplace bool `json:"-"`
InstanceNames []string `json:"-"`
Expand Down Expand Up @@ -438,54 +440,75 @@ func (o *OperationsOptions) validatePromote(clusterObj *appsv1.Cluster) error {
podObj = &corev1.Pod{}
)

if o.Instance == "" {
if o.Candidate == "" {
return fmt.Errorf("--candidate is required")
}
// if the instance is not specified, do not need to check the validity of the instance
// checks the validity of the instance whether it belongs to the current component and ensure it is not the primary or leader instance currently.
podKey := client.ObjectKey{
Namespace: clusterObj.Namespace,
Name: o.Instance,
Name: o.Candidate,
}
if err := util.GetResourceObjectFromGVR(types.PodGVR(), podKey, o.Dynamic, podObj); err != nil || podObj == nil {
return fmt.Errorf("instance %s not found, please check the validity of the instance using \"kbcli cluster list-instances\"", o.Instance)
return fmt.Errorf("instance %s not found, please check the validity of the instance using \"kbcli cluster list-instances\"", o.Candidate)
}
o.ComponentObjectName = constant.GenerateClusterComponentName(clusterObj.Name, podObj.Labels[constant.KBAppComponentLabelKey])

getAndValidatePod := func(targetRoles ...string) error {
// if the instance is not specified, do not need to check the validity of the instance
if o.Instance == "" {
if o.Candidate == "" {
return nil
}
v, ok := podObj.Labels[constant.RoleLabelKey]
if !ok || v == "" {
return fmt.Errorf("instance %s cannot be promoted because it had a invalid role label", o.Instance)
return fmt.Errorf("instance %s cannot be promoted because it had a invalid role label", o.Candidate)
}
for _, targetRole := range targetRoles {
if v == targetRole {
return fmt.Errorf("instanceName %s cannot be promoted because it is already the targetRole %s instance", o.Instance, targetRole)
return fmt.Errorf("instanceName %s cannot be promoted because it is already the targetRole %s instance", o.Candidate, targetRole)
}
}
if podObj.Labels[constant.AppInstanceLabelKey] != clusterObj.Name {
return fmt.Errorf("instanceName %s does not belong to the current cluster, please check the validity of the instance using \"kbcli cluster list-instances\"", o.Instance)
return fmt.Errorf("instanceName %s does not belong to the current cluster, please check the validity of the instance using \"kbcli cluster list-instances\"", o.Candidate)
}
return nil
}

getTargetRole := func(roles []appsv1.ReplicaRole) (string, error) {
pods, err := o.Client.CoreV1().Pods(clusterObj.Namespace).List(context.Background(), metav1.ListOptions{
LabelSelector: fmt.Sprintf("%s=%s", constant.KBAppComponentLabelKey, podObj.Labels[constant.KBAppComponentLabelKey]),
})
if err != nil {
return "", err
}
if o.Instance != "" {
for _, pod := range pods.Items {
if pod.Name == o.Instance {
return pod.Labels[constant.RoleLabelKey], nil
}
}
return "", fmt.Errorf("instance %s not found, please check the validity of the instance using \"kbcli cluster list-instances\"", o.Instance)
}
targetRole := ""
if len(roles) == 0 {
return targetRole, fmt.Errorf("component has no roles definition, does not support switchover")
}
for _, role := range roles {
if role.Serviceable && role.Writable {
if targetRole != "" {
return targetRole, fmt.Errorf("componentDefinition has more than role is serviceable and writable, does not support switchover")
}
targetRole = role.Name
// HACK: assume the role with highest priority to be writable
highestPriority := math.MinInt
var role *appsv1.ReplicaRole
for _, r := range compDefObj.Spec.Roles {
if r.UpdatePriority > highestPriority {
highestPriority = r.UpdatePriority
role = &r
}
}
for _, pod := range pods.Items {
if pod.Labels[constant.RoleLabelKey] == role.Name {
o.Instance = pod.Name
break
}
}
return targetRole, nil
return role.Name, nil
}

// check componentDefinition exist
Expand All @@ -497,7 +520,7 @@ func (o *OperationsOptions) validatePromote(clusterObj *appsv1.Cluster) error {
return err
}
if compDefObj.Spec.LifecycleActions == nil || compDefObj.Spec.LifecycleActions.Switchover == nil {
return fmt.Errorf(`this instance "%s does not support switchover, you can define the switchover action in the componentDef "%s"`, o.Instance, compDefKey.Name)
return fmt.Errorf(`this instance "%s does not support switchover, you can define the switchover action in the componentDef "%s"`, o.Candidate, compDefKey.Name)
}
targetRole, err := getTargetRole(compDefObj.Spec.Roles)
if err != nil {
Expand Down Expand Up @@ -1008,7 +1031,8 @@ func NewPromoteCmd(f cmdutil.Factory, streams genericiooptions.IOStreams) *cobra
cmdutil.CheckErr(o.Run())
},
}
cmd.Flags().StringVar(&o.Instance, "candidate", "", "Specify the instance name as the new primary or leader of the cluster, you can get the instance name by running \"kbcli cluster list-instances\"")
cmd.Flags().StringVar(&o.Candidate, "candidate", "", "Specify the instance name as the new primary or leader of the cluster, you can get the instance name by running \"kbcli cluster list-instances\"")
cmd.Flags().StringVar(&o.Instance, "instance", "", "Specify the instance name that will transfer its role to the candidate pod, If not set, the current primary or leader of the cluster will be used.")
cmd.Flags().BoolVar(&o.AutoApprove, "auto-approve", false, "Skip interactive approval before promote the instance")
_ = cmd.MarkFlagRequired("candidate")
o.addCommonFlags(cmd, f)
Expand Down
12 changes: 6 additions & 6 deletions pkg/cmd/cluster/operations_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,17 +359,17 @@ var _ = Describe("operations", func() {
By("validate failed because o.Instance is illegal ")
o.Name = clusterName1
o.Component = testing.ComponentName
o.Instance = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 5)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 5)
Expect(o.Validate()).ShouldNot(Succeed())
Expect(testing.ContainExpectStrings(o.Validate().Error(), "not found")).Should(BeTrue())

By("validate failed because o.Instance is already leader and cannot be promoted")
o.Instance = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 0)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 0)
Expect(o.Validate()).ShouldNot(Succeed())
Expect(testing.ContainExpectStrings(o.Validate().Error(), "cannot be promoted because it is already the targetRole")).Should(BeTrue())

By("validate failed because o.Instance does not belong to the current component")
o.Instance = fmt.Sprintf("%s-%s-%d", clusterName, testing.ComponentName, 1)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterName, testing.ComponentName, 1)
o.Name = clusterName1
Expect(o.Validate()).ShouldNot(Succeed())
Expect(testing.ContainExpectStrings(o.Validate().Error(), "does not belong to the current cluster")).Should(BeTrue())
Expand All @@ -394,17 +394,17 @@ var _ = Describe("operations", func() {

By("validate failed because o.Instance is illegal ")
o.Name = clusterNameWithCompDef
o.Instance = fmt.Sprintf("%s-%s-%d", clusterNameWithCompDef, testing.ComponentName, 5)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterNameWithCompDef, testing.ComponentName, 5)
Expect(o.Validate()).ShouldNot(Succeed())
Expect(testing.ContainExpectStrings(o.Validate().Error(), "not found")).Should(BeTrue())

By("validate failed because o.Instance is already leader and cannot be promoted")
o.Instance = fmt.Sprintf("%s-%s-%d", clusterNameWithCompDef, testing.ComponentName, 0)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterNameWithCompDef, testing.ComponentName, 0)
Expect(o.Validate()).ShouldNot(Succeed())
Expect(testing.ContainExpectStrings(o.Validate().Error(), "cannot be promoted because it is already the targetRole")).Should(BeTrue())

By("validate failed because o.Instance does not belong to the current cluster")
o.Instance = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 1)
o.Candidate = fmt.Sprintf("%s-%s-%d", clusterName1, testing.ComponentName, 1)
o.Component = testing.ComponentName
o.Name = clusterName
Expect(o.Validate()).ShouldNot(Succeed())
Expand Down
9 changes: 4 additions & 5 deletions pkg/cmd/componentdefinition/describe.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ func showRoles(roles []kbappsv1.ReplicaRole, out io.Writer) {
}
fmt.Fprintf(out, "Roles:\n")
tbl := printer.NewTablePrinter(out)
tbl.SetHeader("\tNAME", "SERVICE-ABLE", "WRITE-ABLE", "VOTE-ABLE")
tbl.SetHeader("\tNAME", "WITH-QUORUM", "UPDATE-PRIORITY")
for _, role := range roles {
tbl.AddRow("\t"+role.Name, role.Serviceable, role.Writable, role.Votable)
tbl.AddRow("\t"+role.Name, role.ParticipatesInQuorum, role.UpdatePriority)
}
tbl.Print()
fmt.Fprint(out, "\n")
Expand Down Expand Up @@ -228,10 +228,9 @@ func showSystemAccounts(systemAccounts []kbappsv1.SystemAccount, out io.Writer)
}
fmt.Fprintf(out, "System Accounts:\n")
tbl := printer.NewTablePrinter(out)
tbl.SetHeader("\tNAME", "INIT-ACCOUNT", "HAS-SECRET-REF")
tbl.SetHeader("\tNAME", "INIT-ACCOUNT")
for _, sa := range systemAccounts {
hasSecretRef := sa.SecretRef != nil
tbl.AddRow("\t"+sa.Name, sa.InitAccount, hasSecretRef)
tbl.AddRow("\t"+sa.Name, sa.InitAccount)
}
tbl.Print()
fmt.Fprint(out, "\n")
Expand Down
21 changes: 9 additions & 12 deletions pkg/testing/fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,22 +314,19 @@ func FakeCompDef() *kbappsv1.ComponentDefinition {
},
Roles: []kbappsv1.ReplicaRole{
{
Name: "leader",
Serviceable: true,
Writable: true,
Votable: true,
Name: "leader",
ParticipatesInQuorum: true,
UpdatePriority: 2,
},
{
Name: "follower",
Serviceable: true,
Writable: false,
Votable: true,
Name: "follower",
ParticipatesInQuorum: true,
UpdatePriority: 1,
},
{
Name: "learner",
Serviceable: false,
Writable: false,
Votable: false,
Name: "learner",
ParticipatesInQuorum: true,
UpdatePriority: 0,
},
},
SystemAccounts: []kbappsv1.SystemAccount{
Expand Down

0 comments on commit f147a40

Please sign in to comment.