Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Recreate bootstrap token if it was cleaned up #11520

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions bootstrap/kubeadm/internal/controllers/kubeadmconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c
// If the BootstrapToken has been generated for a join but the config owner has no nodeRefs,
// this indicates that the node has not yet joined and the token in the join config has not
// been consumed and it may need a refresh.
return r.refreshBootstrapTokenIfNeeded(ctx, config, cluster)
return r.refreshBootstrapTokenIfNeeded(ctx, config, cluster, scope)
}
if configOwner.IsMachinePool() {
// If the BootstrapToken has been generated and infrastructure is ready but the configOwner is a MachinePool,
Expand Down Expand Up @@ -360,7 +360,7 @@ func (r *KubeadmConfigReconciler) reconcile(ctx context.Context, scope *Scope, c
return r.joinWorker(ctx, scope)
}

func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster) (ctrl.Result, error) {
func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster, scope *Scope) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)
token := config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token

Expand All @@ -371,6 +371,11 @@ func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Cont

secret, err := getToken(ctx, remoteClient, token)
if err != nil {
if apierrors.IsNotFound(err) && scope.ConfigOwner.IsMachinePool() {
log.Info("Bootstrap token secret not found, triggering creation of new token")
config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = ""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems the cleanup of the token here is not needed, since it is added in recreateBootstrapToken

Suggested change
config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = ""

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is needed IMO. If recreateBootstrapToken fails, we want the token to be unset in the Patch request which always happens at the end of reconciliation (even on failure).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense. It maybe better to move it to the beginning of the recreateBootstrapToken to make it a part of the rotation process, but it is more of a preference (it feels like a part of it).

Otherwise LGTM

/lgtm

return r.recreateBootstrapToken(ctx, config, scope, remoteClient)
}
return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap token secret in order to refresh it")
}
log = log.WithValues("Secret", klog.KObj(secret))
Expand Down Expand Up @@ -401,13 +406,33 @@ func (r *KubeadmConfigReconciler) refreshBootstrapTokenIfNeeded(ctx context.Cont
log.Info("Refreshing token until the infrastructure has a chance to consume it", "oldExpiration", secretExpiration, "newExpiration", newExpiration)
err = remoteClient.Update(ctx, secret)
if err != nil {
if apierrors.IsNotFound(err) && scope.ConfigOwner.IsMachinePool() {
log.Info("Bootstrap token secret not found, triggering creation of new token")
config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = ""
return r.recreateBootstrapToken(ctx, config, scope, remoteClient)
}
return ctrl.Result{}, errors.Wrapf(err, "failed to refresh bootstrap token")
}
return ctrl.Result{
RequeueAfter: r.tokenCheckRefreshOrRotationInterval(),
}, nil
}

func (r *KubeadmConfigReconciler) recreateBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, scope *Scope, remoteClient client.Client) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)

token, err := createToken(ctx, remoteClient, r.TokenTTL)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create new bootstrap token")
}

config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = token
log.V(3).Info("Altering JoinConfiguration.Discovery.BootstrapToken.Token")

// Update the bootstrap data
return r.joinWorker(ctx, scope)
}

func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Context, config *bootstrapv1.KubeadmConfig, cluster *clusterv1.Cluster, scope *Scope) (ctrl.Result, error) {
log := ctrl.LoggerFrom(ctx)
log.V(2).Info("Config is owned by a MachinePool, checking if token should be rotated")
Expand All @@ -423,16 +448,7 @@ func (r *KubeadmConfigReconciler) rotateMachinePoolBootstrapToken(ctx context.Co
}
if shouldRotate {
log.Info("Creating new bootstrap token, the existing one should be rotated")
token, err := createToken(ctx, remoteClient, r.TokenTTL)
if err != nil {
return ctrl.Result{}, errors.Wrapf(err, "failed to create new bootstrap token")
}

config.Spec.JoinConfiguration.Discovery.BootstrapToken.Token = token
log.V(3).Info("Altering JoinConfiguration.Discovery.BootstrapToken.Token")

// update the bootstrap data
return r.joinWorker(ctx, scope)
return r.recreateBootstrapToken(ctx, config, scope, remoteClient)
}
return ctrl.Result{
RequeueAfter: r.tokenCheckRefreshOrRotationInterval(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,155 @@ func TestBootstrapTokenRotationMachinePool(t *testing.T) {
g.Expect(foundNew).To(BeTrue())
}

func TestBootstrapTokenRefreshIfTokenSecretCleaned(t *testing.T) {
t.Run("should not recreate the token for Machines", func(t *testing.T) {
g := NewWithT(t)

cluster := builder.Cluster(metav1.NamespaceDefault, "cluster").Build()
cluster.Status.InfrastructureReady = true
conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition)
cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443}

controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine")
initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine.Namespace, "control-plane-init-config")

addKubeadmConfigToMachine(initConfig, controlPlaneInitMachine)

workerMachine := newWorkerMachineForCluster(cluster)
workerJoinConfig := newWorkerJoinKubeadmConfig(metav1.NamespaceDefault, "worker-join-cfg")
addKubeadmConfigToMachine(workerJoinConfig, workerMachine)
objects := []client.Object{
cluster,
workerMachine,
workerJoinConfig,
}

objects = append(objects, createSecrets(t, cluster, initConfig)...)
myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}).Build()
remoteClient := fake.NewClientBuilder().Build()
k := &KubeadmConfigReconciler{
Client: myclient,
SecretCachingClient: myclient,
KubeadmInitLock: &myInitLocker{},
TokenTTL: DefaultTokenTTL,
ClusterCache: clustercache.NewFakeClusterCache(remoteClient, client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}),
}
request := ctrl.Request{
NamespacedName: client.ObjectKey{
Namespace: metav1.NamespaceDefault,
Name: "worker-join-cfg",
},
}
result, err := k.Reconcile(ctx, request)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3))

cfg, err := getKubeadmConfig(myclient, "worker-join-cfg", metav1.NamespaceDefault)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(cfg.Status.Ready).To(BeTrue())
g.Expect(cfg.Status.DataSecretName).NotTo(BeNil())
g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil())
g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty())
firstToken := cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token

l := &corev1.SecretList{}
g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed())
g.Expect(l.Items).To(HaveLen(1))

t.Log("Token should not get recreated for single Machine since it will not use the new token if spec.bootstrap.dataSecretName was already set")

// Simulate token cleaner of Kubernetes having deleted the token secret
err = remoteClient.Delete(ctx, &l.Items[0])
g.Expect(err).ToNot(HaveOccurred())

result, err = k.Reconcile(ctx, request)
g.Expect(err).To(HaveOccurred())
g.Expect(err.Error()).To(ContainSubstring("failed to get bootstrap token secret in order to refresh it"))
// New token should not have been created
cfg, err = getKubeadmConfig(myclient, "worker-join-cfg", metav1.NamespaceDefault)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).To(Equal(firstToken))

l = &corev1.SecretList{}
g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed())
g.Expect(l.Items).To(BeEmpty())
})
t.Run("should recreate the token for MachinePools", func(t *testing.T) {
_ = feature.MutableGates.Set("MachinePool=true")
g := NewWithT(t)

cluster := builder.Cluster(metav1.NamespaceDefault, "cluster").Build()
cluster.Status.InfrastructureReady = true
conditions.MarkTrue(cluster, clusterv1.ControlPlaneInitializedCondition)
cluster.Spec.ControlPlaneEndpoint = clusterv1.APIEndpoint{Host: "100.105.150.1", Port: 6443}

controlPlaneInitMachine := newControlPlaneMachine(cluster, "control-plane-init-machine")
initConfig := newControlPlaneInitKubeadmConfig(controlPlaneInitMachine.Namespace, "control-plane-init-config")

addKubeadmConfigToMachine(initConfig, controlPlaneInitMachine)

workerMachinePool := newWorkerMachinePoolForCluster(cluster)
workerJoinConfig := newWorkerJoinKubeadmConfig(workerMachinePool.Namespace, "workerpool-join-cfg")
addKubeadmConfigToMachinePool(workerJoinConfig, workerMachinePool)
objects := []client.Object{
cluster,
workerMachinePool,
workerJoinConfig,
}

objects = append(objects, createSecrets(t, cluster, initConfig)...)
myclient := fake.NewClientBuilder().WithObjects(objects...).WithStatusSubresource(&bootstrapv1.KubeadmConfig{}, &expv1.MachinePool{}).Build()
remoteClient := fake.NewClientBuilder().Build()
k := &KubeadmConfigReconciler{
Client: myclient,
SecretCachingClient: myclient,
KubeadmInitLock: &myInitLocker{},
TokenTTL: DefaultTokenTTL,
ClusterCache: clustercache.NewFakeClusterCache(remoteClient, client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}),
}
request := ctrl.Request{
NamespacedName: client.ObjectKey{
Namespace: metav1.NamespaceDefault,
Name: "workerpool-join-cfg",
},
}
result, err := k.Reconcile(ctx, request)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3))

cfg, err := getKubeadmConfig(myclient, "workerpool-join-cfg", metav1.NamespaceDefault)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(cfg.Status.Ready).To(BeTrue())
g.Expect(cfg.Status.DataSecretName).NotTo(BeNil())
g.Expect(cfg.Status.ObservedGeneration).NotTo(BeNil())
g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty())
firstToken := cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token

l := &corev1.SecretList{}
g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed())
g.Expect(l.Items).To(HaveLen(1))

t.Log("Ensure that the token gets recreated if it was cleaned up by Kubernetes (e.g. on expiry)")

// Simulate token cleaner of Kubernetes having deleted the token secret
err = remoteClient.Delete(ctx, &l.Items[0])
g.Expect(err).ToNot(HaveOccurred())

result, err = k.Reconcile(ctx, request)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(result.RequeueAfter).To(Equal(k.TokenTTL / 3))
// New token should have been created
cfg, err = getKubeadmConfig(myclient, "workerpool-join-cfg", metav1.NamespaceDefault)
g.Expect(err).ToNot(HaveOccurred())
g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(BeEmpty())
g.Expect(cfg.Spec.JoinConfiguration.Discovery.BootstrapToken.Token).ToNot(Equal(firstToken))

l = &corev1.SecretList{}
g.Expect(remoteClient.List(ctx, l, client.ListOption(client.InNamespace(metav1.NamespaceSystem)))).To(Succeed())
g.Expect(l.Items).To(HaveLen(1))
})
}

// Ensure the discovery portion of the JoinConfiguration gets generated correctly.
func TestKubeadmConfigReconciler_Reconcile_DiscoveryReconcileBehaviors(t *testing.T) {
caHash := []string{"...."}
Expand Down
Loading