Skip to content

Commit

Permalink
chore: Wait for instance termination before deleting nodeclaim
Browse files Browse the repository at this point in the history
  • Loading branch information
jigisha620 committed Apr 5, 2024
1 parent 7713f4f commit af11d0d
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 7 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ require (
k8s.io/utils v0.0.0-20240102154912-e7106e64919e
knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd
sigs.k8s.io/controller-runtime v0.17.2
sigs.k8s.io/karpenter v0.35.1-0.20240404200702-545d88a836d4
sigs.k8s.io/karpenter v0.35.1-0.20240405012730-3e1180609c13
sigs.k8s.io/yaml v1.4.0
)

Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -759,8 +759,8 @@ sigs.k8s.io/controller-runtime v0.17.2 h1:FwHwD1CTUemg0pW2otk7/U5/i5m2ymzvOXdbeG
sigs.k8s.io/controller-runtime v0.17.2/go.mod h1:+MngTvIQQQhfXtwfdGw/UOQ/aIaqsYywfCINOtwMO/s=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.35.1-0.20240404200702-545d88a836d4 h1:eb3ubw9tFiV62Gq1Ci5r5BCJ8OlKu2Mqf7h3tJIRe0U=
sigs.k8s.io/karpenter v0.35.1-0.20240404200702-545d88a836d4/go.mod h1:fieFojxOec/l0tDmFT7R+g/Y+SGQbL9VlcYO8xb3sLo=
sigs.k8s.io/karpenter v0.35.1-0.20240405012730-3e1180609c13 h1:zqAkieh8q+3mCS8DgRB4/x6JrzytSXNZG2mDfLn7il4=
sigs.k8s.io/karpenter v0.35.1-0.20240405012730-3e1180609c13/go.mod h1:fieFojxOec/l0tDmFT7R+g/Y+SGQbL9VlcYO8xb3sLo=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
4 changes: 2 additions & 2 deletions pkg/controllers/nodeclaim/garbagecollection/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,8 @@ func (c *Controller) Reconcile(ctx context.Context, _ reconcile.Request) (reconc

func (c *Controller) garbageCollect(ctx context.Context, nodeClaim *v1beta1.NodeClaim, nodeList *v1.NodeList) error {
ctx = logging.WithLogger(ctx, logging.FromContext(ctx).With("provider-id", nodeClaim.Status.ProviderID))
if err := c.cloudProvider.Delete(ctx, nodeClaim); err != nil {
return cloudprovider.IgnoreNodeClaimNotFoundError(err)
if err := c.cloudProvider.Delete(ctx, nodeClaim); cloudprovider.IgnoreNodeClaimNotFoundError(err) != nil && cloudprovider.IgnoreRetryableError(err) != nil {
return err
}
logging.FromContext(ctx).Debugf("garbage collected cloudprovider instance")

Expand Down
10 changes: 8 additions & 2 deletions pkg/providers/instance/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,10 @@ func (p *Provider) List(ctx context.Context) ([]*Instance, error) {
}

func (p *Provider) Delete(ctx context.Context, id string) error {
if _, err := p.ec2Batcher.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
response, err := p.ec2Batcher.TerminateInstances(ctx, &ec2.TerminateInstancesInput{
InstanceIds: []*string{aws.String(id)},
}); err != nil {
})
if err != nil {
if awserrors.IsNotFound(err) {
return cloudprovider.NewNodeClaimNotFoundError(fmt.Errorf("instance already terminated"))
}
Expand All @@ -166,6 +167,11 @@ func (p *Provider) Delete(ctx context.Context, id string) error {
}
return fmt.Errorf("terminating instance, %w", err)
}
for _, instance := range response.TerminatingInstances {
if instance.CurrentState.Name != aws.String(ec2.InstanceStateNameTerminated) {
return cloudprovider.NewRetryableError(fmt.Errorf("terminating instance '%s'", aws.StringValue(instance.CurrentState.Name)))
}
}
return nil
}

Expand Down
41 changes: 41 additions & 0 deletions pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,4 +194,45 @@ var _ = Describe("InstanceProvider", func() {
retrievedIDs := sets.New[string](lo.Map(instances, func(i *instance.Instance, _ int) string { return i.ID })...)
Expect(ids.Equal(retrievedIDs)).To(BeTrue())
})
It("should return retryable error when instance state is not terminated", func() {
instanceID := fake.InstanceID()
instance := &ec2.Instance{
State: &ec2.InstanceState{
Name: aws.String(ec2.InstanceStateNameRunning),
},
Tags: []*ec2.Tag{
{
Key: aws.String(fmt.Sprintf("kubernetes.io/cluster/%s", options.FromContext(ctx).ClusterName)),
Value: aws.String("owned"),
},
{
Key: aws.String(corev1beta1.NodePoolLabelKey),
Value: aws.String("default"),
},
{
Key: aws.String(v1beta1.LabelNodeClass),
Value: aws.String("default"),
},
{
Key: aws.String(corev1beta1.ManagedByAnnotationKey),
Value: aws.String(options.FromContext(ctx).ClusterName),
},
},
PrivateDnsName: aws.String(fake.PrivateDNSName()),
Placement: &ec2.Placement{
AvailabilityZone: aws.String(fake.DefaultRegion),
},
// Launch time was 1m ago
LaunchTime: aws.Time(time.Now().Add(-time.Minute)),
InstanceId: aws.String(instanceID),
InstanceType: aws.String("m5.large"),
}
awsEnv.EC2API.Instances.Store(
instanceID,
instance,
)

err := awsEnv.InstanceProvider.Delete(ctx, instanceID)
Expect(corecloudprovider.IsRetryableError(err)).To(BeTrue())
})
})

0 comments on commit af11d0d

Please sign in to comment.