Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Update Cloudprovider interface with driftReason #4397

Merged
merged 1 commit into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ require (
github.com/PuerkitoBio/goquery v1.8.1
github.com/avast/retry-go v3.0.0+incompatible
github.com/aws/aws-sdk-go v1.44.294
github.com/aws/karpenter-core v0.29.2-0.20230803235302-95bd9f61a18b
github.com/aws/karpenter-core v0.29.2-0.20230809005636-36c54adb96d7
github.com/go-playground/validator/v10 v10.13.0
github.com/imdario/mergo v0.3.16
github.com/mitchellh/hashstructure/v2 v2.0.2
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ github.com/avast/retry-go v3.0.0+incompatible h1:4SOWQ7Qs+oroOTQOYnAHqelpCO0biHS
github.com/avast/retry-go v3.0.0+incompatible/go.mod h1:XtSnn+n/sHqQIpZ10K1qAevBhOOCWBLXXy3hyiqqBrY=
github.com/aws/aws-sdk-go v1.44.294 h1:3x7GaEth+pDU9HwFcAU0awZlEix5CEdyIZvV08SlHa8=
github.com/aws/aws-sdk-go v1.44.294/go.mod h1:aVsgQcEevwlmQ7qHE9I3h+dtQgpqhFB+i8Phjh7fkwI=
github.com/aws/karpenter-core v0.29.2-0.20230803235302-95bd9f61a18b h1:88YeEA65jQNCj4/AdH0qixJeuGuIOi5QxOzsRCXsJQA=
github.com/aws/karpenter-core v0.29.2-0.20230803235302-95bd9f61a18b/go.mod h1:+C8X0N378fQ/+YmopvRHflj2JFrVP8sPs9xL7v4A6eM=
github.com/aws/karpenter-core v0.29.2-0.20230809005636-36c54adb96d7 h1:912c/OMnMEke0HIevsDHAIMwbsgrXUyJg3YmP3x5iTA=
github.com/aws/karpenter-core v0.29.2-0.20230809005636-36c54adb96d7/go.mod h1:+C8X0N378fQ/+YmopvRHflj2JFrVP8sPs9xL7v4A6eM=
github.com/benbjohnson/clock v1.1.0 h1:Q92kusRqC1XV2MjkWETPvjJVqKetz1OzxZB7mHJLju8=
github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
Expand Down
14 changes: 7 additions & 7 deletions pkg/cloudprovider/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -189,24 +189,24 @@ func (c *CloudProvider) Delete(ctx context.Context, machine *v1alpha5.Machine) e
return c.instanceProvider.Delete(ctx, id)
}

func (c *CloudProvider) IsMachineDrifted(ctx context.Context, machine *v1alpha5.Machine) (bool, error) {
func (c *CloudProvider) IsMachineDrifted(ctx context.Context, machine *v1alpha5.Machine) (cloudprovider.DriftReason, error) {
// Not needed when GetInstanceTypes removes provisioner dependency
provisioner := &v1alpha5.Provisioner{}
if err := c.kubeClient.Get(ctx, types.NamespacedName{Name: machine.Labels[v1alpha5.ProvisionerNameLabelKey]}, provisioner); err != nil {
return false, client.IgnoreNotFound(fmt.Errorf("getting provisioner, %w", err))
return "", client.IgnoreNotFound(fmt.Errorf("getting provisioner, %w", err))
engedaam marked this conversation as resolved.
Show resolved Hide resolved
}
if provisioner.Spec.ProviderRef == nil {
return false, nil
return "", nil
}
nodeTemplate, err := c.resolveNodeTemplate(ctx, nil, provisioner.Spec.ProviderRef)
if err != nil {
return false, client.IgnoreNotFound(fmt.Errorf("resolving node template, %w", err))
return "", client.IgnoreNotFound(fmt.Errorf("resolving node template, %w", err))
}
drifted, err := c.isNodeTemplateDrifted(ctx, machine, provisioner, nodeTemplate)
driftReason, err := c.isNodeTemplateDrifted(ctx, machine, provisioner, nodeTemplate)
if err != nil {
return false, err
return "", err
}
return drifted, nil
return driftReason, nil
}

// Name returns the CloudProvider implementation name.
Expand Down
73 changes: 47 additions & 26 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,90 +30,111 @@ import (
"github.com/aws/karpenter/pkg/utils"
)

func (c *CloudProvider) isNodeTemplateDrifted(ctx context.Context, machine *v1alpha5.Machine, provisioner *v1alpha5.Provisioner, nodeTemplate *v1alpha1.AWSNodeTemplate) (bool, error) {
const (
AMIDrift cloudprovider.DriftReason = "AMIDrift"
SubnetDrift cloudprovider.DriftReason = "SubnetDrift"
SecurityGroupDrift cloudprovider.DriftReason = "SecurityGroupDrift"
NodeTemplateStaticDrift cloudprovider.DriftReason = "NodeTemplateStaticDrift"
)

func (c *CloudProvider) isNodeTemplateDrifted(ctx context.Context, machine *v1alpha5.Machine, provisioner *v1alpha5.Provisioner, nodeTemplate *v1alpha1.AWSNodeTemplate) (cloudprovider.DriftReason, error) {
instance, err := c.getInstance(ctx, machine.Status.ProviderID)
if err != nil {
return false, err
return "", err
}
amiDrifted, err := c.isAMIDrifted(ctx, machine, provisioner, instance, nodeTemplate)
if err != nil {
return false, fmt.Errorf("calculating ami drift, %w", err)
return "", fmt.Errorf("calculating ami drift, %w", err)
}
securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeTemplate)
if err != nil {
return false, fmt.Errorf("calculating securitygroup drift, %w", err)
return "", fmt.Errorf("calculating securitygroup drift, %w", err)
}
subnetDrifted, err := c.isSubnetDrifted(instance, nodeTemplate)
if err != nil {
return false, fmt.Errorf("calculating subnet drift, %w", err)
return "", fmt.Errorf("calculating subnet drift, %w", err)
}

return amiDrifted || securitygroupDrifted || subnetDrifted || c.areStaticFieldsDrifted(machine, nodeTemplate), nil
drifted := lo.FindOrElse([]cloudprovider.DriftReason{amiDrifted, securitygroupDrifted, subnetDrifted, c.areStaticFieldsDrifted(machine, nodeTemplate)}, "", func(i cloudprovider.DriftReason) bool {
engedaam marked this conversation as resolved.
Show resolved Hide resolved
return string(i) != ""
})
return drifted, nil
}

func (c *CloudProvider) isAMIDrifted(ctx context.Context, machine *v1alpha5.Machine, provisioner *v1alpha5.Provisioner,
instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (bool, error) {
instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (cloudprovider.DriftReason, error) {
instanceTypes, err := c.GetInstanceTypes(ctx, provisioner)
if err != nil {
return false, fmt.Errorf("getting instanceTypes, %w", err)
return "", fmt.Errorf("getting instanceTypes, %w", err)
}
nodeInstanceType, found := lo.Find(instanceTypes, func(instType *cloudprovider.InstanceType) bool {
return instType.Name == machine.Labels[v1.LabelInstanceTypeStable]
})
if !found {
return false, fmt.Errorf(`finding node instance type "%s"`, machine.Labels[v1.LabelInstanceTypeStable])
return "", fmt.Errorf(`finding node instance type "%s"`, machine.Labels[v1.LabelInstanceTypeStable])
}
if nodeTemplate.Spec.LaunchTemplateName != nil {
return false, nil
return "", nil
}
amis, err := c.amiProvider.Get(ctx, nodeTemplate, &amifamily.Options{})
if err != nil {
return false, fmt.Errorf("getting amis, %w", err)
return "", fmt.Errorf("getting amis, %w", err)
}
if len(amis) == 0 {
return false, fmt.Errorf("no amis exist given constraints")
return "", fmt.Errorf("no amis exist given constraints")
}
mappedAMIs := amifamily.MapInstanceTypes(amis, []*cloudprovider.InstanceType{nodeInstanceType})
if len(mappedAMIs) == 0 {
return false, fmt.Errorf("no instance types satisfy requirements of amis %v,", amis)
return "", fmt.Errorf("no instance types satisfy requirements of amis %v,", amis)
}
if !lo.Contains(lo.Keys(mappedAMIs), instance.ImageID) {
return AMIDrift, nil
}
return !lo.Contains(lo.Keys(mappedAMIs), instance.ImageID), nil
return "", nil
}

func (c *CloudProvider) isSubnetDrifted(instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (bool, error) {
func (c *CloudProvider) isSubnetDrifted(instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (cloudprovider.DriftReason, error) {
// If the node template status does not have subnets, wait for the subnets to be populated before continuing
if nodeTemplate.Status.Subnets == nil {
return false, fmt.Errorf("AWSNodeTemplate has no subnets")
return "", fmt.Errorf("AWSNodeTemplate has no subnets")
}
_, found := lo.Find(nodeTemplate.Status.Subnets, func(subnet v1alpha1.Subnet) bool {
return subnet.ID == instance.SubnetID
})
return !found, nil
if !found {
return SubnetDrift, nil
}
return "", nil
}

// Checks if the security groups are drifted, by comparing the AWSNodeTemplate.Status.SecurityGroups
// to the ec2 instance security groups
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (bool, error) {
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeTemplate *v1alpha1.AWSNodeTemplate) (cloudprovider.DriftReason, error) {
// nodeTemplate.Spec.SecurityGroupSelector can be nil if the user is using a launchTemplateName to define SecurityGroups
// Karpenter will not drift on changes to securitygroup in the launchTemplateName
if nodeTemplate.Spec.LaunchTemplateName != nil {
return false, nil
return "", nil
}
securityGroupIds := sets.New(lo.Map(nodeTemplate.Status.SecurityGroups, func(sg v1alpha1.SecurityGroup, _ int) string { return sg.ID })...)
if len(securityGroupIds) == 0 {
return false, fmt.Errorf("no security groups exist in the AWSNodeTemplate Status")
return "", fmt.Errorf("no security groups exist in the AWSNodeTemplate Status")
}

if !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)) {
return SecurityGroupDrift, nil
}
return !securityGroupIds.Equal(sets.New(ec2Instance.SecurityGroupIDs...)), nil
return "", nil
}

func (c *CloudProvider) areStaticFieldsDrifted(machine *v1alpha5.Machine, nodeTemplate *v1alpha1.AWSNodeTemplate) bool {
func (c *CloudProvider) areStaticFieldsDrifted(machine *v1alpha5.Machine, nodeTemplate *v1alpha1.AWSNodeTemplate) cloudprovider.DriftReason {
nodeTemplateHash, foundHashNodeTemplate := nodeTemplate.ObjectMeta.Annotations[v1alpha1.AnnotationNodeTemplateHash]
machineHash, foundHashMachine := machine.ObjectMeta.Annotations[v1alpha1.AnnotationNodeTemplateHash]
if !foundHashNodeTemplate || !foundHashMachine {
return false
return ""
}

return nodeTemplateHash != machineHash
if nodeTemplateHash != machineHash {
return NodeTemplateStaticDrift
}
return ""
}

func (c *CloudProvider) getInstance(ctx context.Context, providerID string) (*instance.Instance, error) {
Expand Down
34 changes: 17 additions & 17 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,33 +307,33 @@ var _ = Describe("CloudProvider", func() {
ExpectDeleted(ctx, env.Client, nodeTemplate)
drifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(drifted).To(BeFalse())
Expect(drifted).To(BeEmpty())
})
It("should return false if providerRef is not defined", func() {
provisioner.Spec.ProviderRef = nil
ExpectApplied(ctx, env.Client, provisioner)
drifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(drifted).To(BeFalse())
Expect(drifted).To(BeEmpty())
})
It("should not fail if provisioner does not exist", func() {
ExpectDeleted(ctx, env.Client, provisioner)
drifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(drifted).To(BeFalse())
Expect(drifted).To(BeEmpty())
})
It("should return drifted if the AMI is not valid", func() {
// Instance is a reference to what we return in the GetInstances call
instance.ImageId = aws.String(fake.ImageID())
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.AMIDrift))
})
It("should return drifted if the subnet is not valid", func() {
instance.SubnetId = aws.String(fake.SubnetID())
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.SubnetDrift))
})
It("should return an error if AWSNodeTemplate subnets are empty", func() {
nodeTemplate.Status.Subnets = []v1alpha1.Subnet{}
Expand All @@ -344,7 +344,7 @@ var _ = Describe("CloudProvider", func() {
It("should not return drifted if the machine is valid", func() {
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
})
It("should return an error if the AWSNodeTemplate securitygroup are empty", func() {
nodeTemplate.Status.SecurityGroups = []v1alpha1.SecurityGroup{}
Expand All @@ -359,14 +359,14 @@ var _ = Describe("CloudProvider", func() {
instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}}
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
})
It("should return drifted if there are more instance securitygroups are present than AWSNodeTemplate Status", func() {
// Instance is a reference to what we return in the GetInstances call
instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}, {GroupId: aws.String(validSecurityGroup)}}
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
})
It("should return drifted if more AWSNodeTemplate securitygroups are present than instance securitygroups", func() {
nodeTemplate.Status.SecurityGroups = []v1alpha1.SecurityGroup{
Expand All @@ -382,20 +382,20 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, nodeTemplate)
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
})
It("should not return drifted if launchTemplateName is defined", func() {
nodeTemplate.Spec.LaunchTemplateName = aws.String("validLaunchTemplateName")
nodeTemplate.Spec.SecurityGroupSelector = nil
nodeTemplate.Status.SecurityGroups = nil
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
})
It("should not return drifted if the securitygroups match", func() {
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
})
It("should error if the machine doesn't have the instance-type label", func() {
machine.Labels = map[string]string{
Expand All @@ -408,7 +408,7 @@ var _ = Describe("CloudProvider", func() {
machine.Status = v1alpha5.MachineStatus{}
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).To(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
})
It("should error drift if the underlying machine does not exist", func() {
awsEnv.EC2API.DescribeInstancesBehavior.Output.Set(&ec2.DescribeInstancesOutput{
Expand All @@ -431,7 +431,7 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, provisioner, nodeTemplate)
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())

updatedAWSNodeTemplate := test.AWSNodeTemplate(*nodeTemplate.Spec.DeepCopy(), awsnodetemplatespec)
updatedAWSNodeTemplate.ObjectMeta = nodeTemplate.ObjectMeta
Expand All @@ -441,7 +441,7 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, updatedAWSNodeTemplate)
isDrifted, err = cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeTrue())
Expect(isDrifted).To(Equal(cloudprovider.NodeTemplateStaticDrift))
},
Entry("InstanceProfile Drift", v1alpha1.AWSNodeTemplateSpec{AWS: v1alpha1.AWS{InstanceProfile: aws.String("profile-2")}}),
Entry("UserData Drift", v1alpha1.AWSNodeTemplateSpec{UserData: aws.String("userdata-test-2")}),
Expand All @@ -457,7 +457,7 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, provisioner, nodeTemplate)
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())

updatedAWSNodeTemplate := test.AWSNodeTemplate(*nodeTemplate.Spec.DeepCopy(), awsnodetemplatespec)
updatedAWSNodeTemplate.ObjectMeta = nodeTemplate.ObjectMeta
Expand All @@ -467,7 +467,7 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, updatedAWSNodeTemplate)
isDrifted, err = cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
},
Entry("AMISelector Drift", v1alpha1.AWSNodeTemplateSpec{AMISelector: map[string]string{"aws::ids": validAMI}}),
Entry("SubnetSelector Drift", v1alpha1.AWSNodeTemplateSpec{AWS: v1alpha1.AWS{SubnetSelector: map[string]string{"aws-ids": "subnet-test1"}}}),
Expand All @@ -481,7 +481,7 @@ var _ = Describe("CloudProvider", func() {
ExpectApplied(ctx, env.Client, provisioner, nodeTemplate)
isDrifted, err := cloudProvider.IsMachineDrifted(ctx, machine)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeFalse())
Expect(isDrifted).To(BeEmpty())
})
})
})
Expand Down
6 changes: 3 additions & 3 deletions pkg/fake/cloudprovider.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,14 @@ func (c *CloudProvider) GetInstanceTypes(_ context.Context, _ *v1alpha5.Provisio
}, nil
}

func (c *CloudProvider) IsMachineDrifted(_ context.Context, machine *v1alpha5.Machine) (bool, error) {
func (c *CloudProvider) IsMachineDrifted(_ context.Context, machine *v1alpha5.Machine) (corecloudprovider.DriftReason, error) {
nodeAMI := machine.Labels[v1alpha1.LabelInstanceAMIID]
for _, ami := range c.ValidAMIs {
if nodeAMI == ami {
return false, nil
return "", nil
engedaam marked this conversation as resolved.
Show resolved Hide resolved
}
}
return true, nil
return "drifted", nil
}

func (c *CloudProvider) Get(context.Context, string) (*v1alpha5.Machine, error) {
Expand Down