Skip to content

Commit

Permalink
Use Security Groups status for node launch
Browse files Browse the repository at this point in the history
  • Loading branch information
engedaam committed Apr 24, 2024
1 parent b7745bf commit 2cab316
Show file tree
Hide file tree
Showing 10 changed files with 222 additions and 23 deletions.
4 changes: 4 additions & 0 deletions pkg/apis/crds/karpenter.k8s.aws_ec2nodeclasses.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,10 @@ spec:
description: InstanceProfile contains the resolved instance profile
for the role
type: string
observedGeneration:
description: The generation observed by the nodeclass controller.
format: int64
type: integer
securityGroups:
description: |-
SecurityGroups contains the current Security Groups values that are available to the
Expand Down
3 changes: 3 additions & 0 deletions pkg/apis/v1beta1/ec2nodeclass_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,4 +68,7 @@ type EC2NodeClassStatus struct {
// InstanceProfile contains the resolved instance profile for the role
// +optional
InstanceProfile string `json:"instanceProfile,omitempty"`
// The generation observed by the nodeclass controller.
// +optional
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
}
11 changes: 5 additions & 6 deletions pkg/cloudprovider/drift.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (c *CloudProvider) isNodeClassDrifted(ctx context.Context, nodeClaim *corev
if err != nil {
return "", fmt.Errorf("calculating ami drift, %w", err)
}
securitygroupDrifted, err := c.areSecurityGroupsDrifted(ctx, instance, nodeClass)
securitygroupDrifted, err := c.areSecurityGroupsDrifted(instance, nodeClass)
if err != nil {
return "", fmt.Errorf("calculating securitygroup drift, %w", err)
}
Expand Down Expand Up @@ -118,12 +118,11 @@ func (c *CloudProvider) isSubnetDrifted(ctx context.Context, instance *instance.

// Checks if the security groups are drifted, by comparing the security groups returned from the SecurityGroupProvider
// to the ec2 instance security groups
func (c *CloudProvider) areSecurityGroupsDrifted(ctx context.Context, ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
securitygroup, err := c.securityGroupProvider.List(ctx, nodeClass)
if err != nil {
return "", err
func (c *CloudProvider) areSecurityGroupsDrifted(ec2Instance *instance.Instance, nodeClass *v1beta1.EC2NodeClass) (cloudprovider.DriftReason, error) {
if nodeClass.Generation != nodeClass.Status.ObservedGeneration {
return "", fmt.Errorf("waiting until nodeclass is updated")
}
securityGroupIds := sets.New(lo.Map(securitygroup, func(sg *ec2.SecurityGroup, _ int) string { return aws.StringValue(sg.GroupId) })...)
securityGroupIds := sets.New(lo.Map(nodeClass.Status.SecurityGroups, func(sg v1beta1.SecurityGroup, _ int) string { return sg.ID })...)
if len(securityGroupIds) == 0 {
return "", fmt.Errorf("no security groups are discovered")
}
Expand Down
41 changes: 37 additions & 4 deletions pkg/cloudprovider/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,15 +39,19 @@ import (
"github.com/aws/karpenter-provider-aws/pkg/apis"
"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
"github.com/aws/karpenter-provider-aws/pkg/cloudprovider"
"github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status"
"github.com/aws/karpenter-provider-aws/pkg/fake"
"github.com/aws/karpenter-provider-aws/pkg/operator/options"
"github.com/aws/karpenter-provider-aws/pkg/test"

"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
corecloudproivder "sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/controllers/provisioning"
"sigs.k8s.io/karpenter/pkg/controllers/state"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/operator/controller"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/operator/scheme"
coretest "sigs.k8s.io/karpenter/pkg/test"
Expand All @@ -67,6 +71,7 @@ var cloudProvider *cloudprovider.CloudProvider
var cluster *state.Cluster
var fakeClock *clock.FakeClock
var recorder events.Recorder
var statusController controller.Controller

func TestAWS(t *testing.T) {
ctx = TestContextWithLogger(t)
Expand All @@ -86,6 +91,14 @@ var _ = BeforeSuite(func() {
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider)
cluster = state.NewCluster(fakeClock, env.Client, cloudProvider)
prov = provisioning.NewProvisioner(env.Client, recorder, cloudProvider, cluster)
statusController = status.NewController(
env.Client,
awsEnv.SubnetProvider,
awsEnv.SecurityGroupProvider,
awsEnv.AMIProvider,
awsEnv.InstanceProfileProvider,
awsEnv.LaunchTemplateProvider,
)
})

var _ = AfterSuite(func() {
Expand Down Expand Up @@ -565,8 +578,10 @@ var _ = Describe("CloudProvider", func() {
awsEnv.EC2API.DescribeSubnetsOutput.Set(&ec2.DescribeSubnetsOutput{
Subnets: []*ec2.Subnet{
{
SubnetId: aws.String(validSubnet1),
AvailabilityZone: aws.String("zone-1"),
SubnetId: aws.String(validSubnet1),
AvailabilityZone: aws.String("zone-1"),
AvailableIpAddressCount: aws.Int64(100),
MapPublicIpOnLaunch: aws.Bool(false),
Tags: []*ec2.Tag{
{
Key: aws.String("sn-key-1"),
Expand All @@ -575,8 +590,10 @@ var _ = Describe("CloudProvider", func() {
},
},
{
SubnetId: aws.String(validSubnet2),
AvailabilityZone: aws.String("zone-2"),
SubnetId: aws.String(validSubnet2),
AvailabilityZone: aws.String("zone-2"),
AvailableIpAddressCount: aws.Int64(100),
MapPublicIpOnLaunch: aws.Bool(false),
Tags: []*ec2.Tag{
{
Key: aws.String("sn-key-2"),
Expand All @@ -587,6 +604,7 @@ var _ = Describe("CloudProvider", func() {
},
})
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
instanceTypes, err := cloudProvider.GetInstanceTypes(ctx, nodePool)
Expect(err).ToNot(HaveOccurred())
selectedInstanceType = instanceTypes[0]
Expand Down Expand Up @@ -621,6 +639,8 @@ var _ = Describe("CloudProvider", func() {
nodeClaim.Labels = lo.Assign(nodeClaim.Labels, map[string]string{v1.LabelInstanceTypeStable: selectedInstanceType.Name})
})
It("should not fail if NodeClass does not exist", func() {
controllerutil.RemoveFinalizer(nodeClass, v1beta1.TerminationFinalizer)
ExpectApplied(ctx, env.Client, nodeClass)
ExpectDeleted(ctx, env.Client, nodeClass)
drifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expand Down Expand Up @@ -674,6 +694,8 @@ var _ = Describe("CloudProvider", func() {
awsEnv.EC2API.DescribeSecurityGroupsOutput.Set(&ec2.DescribeSecurityGroupsOutput{SecurityGroups: []*ec2.SecurityGroup{}})
// Instance is a reference to what we return in the GetInstances call
instance.SecurityGroups = []*ec2.GroupIdentifier{{GroupId: aws.String(fake.SecurityGroupID())}}
awsEnv.SecurityGroupCache.Flush()
ExpectReconcileFailed(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
_, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).To(HaveOccurred())
})
Expand Down Expand Up @@ -704,6 +726,8 @@ var _ = Describe("CloudProvider", func() {
},
},
})
awsEnv.SecurityGroupCache.Flush()
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.SecurityGroupDrift))
Expand Down Expand Up @@ -734,6 +758,7 @@ var _ = Describe("CloudProvider", func() {
It("should return drifted if the AMI no longer matches the existing NodeClaims instance type", func() {
nodeClass.Spec.AMISelectorTerms = []v1beta1.AMISelectorTerm{{ID: amdAMIID}}
ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).ToNot(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.AMIDrift))
Expand Down Expand Up @@ -784,6 +809,7 @@ var _ = Describe("CloudProvider", func() {
DescribeTable("should return drifted if a statically drifted EC2NodeClass.Spec field is updated",
func(changes v1beta1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -792,6 +818,7 @@ var _ = Describe("CloudProvider", func() {
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})

ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(Equal(cloudprovider.NodeClassDrift))
Expand Down Expand Up @@ -831,6 +858,7 @@ var _ = Describe("CloudProvider", func() {
DescribeTable("should not return drifted if dynamic fields are updated",
func(changes v1beta1.EC2NodeClass) {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -839,6 +867,7 @@ var _ = Describe("CloudProvider", func() {
nodeClass.Annotations = lo.Assign(nodeClass.Annotations, map[string]string{v1beta1.AnnotationEC2NodeClassHash: nodeClass.Hash()})

ExpectApplied(ctx, env.Client, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err = cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -855,6 +884,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -869,6 +899,7 @@ var _ = Describe("CloudProvider", func() {
v1beta1.AnnotationEC2NodeClassHashVersion: "test-hash-version-2",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -886,6 +917,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand All @@ -903,6 +935,7 @@ var _ = Describe("CloudProvider", func() {
"Test Key": "Test Value",
}
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
isDrifted, err := cloudProvider.IsDrifted(ctx, nodeClaim)
Expect(err).NotTo(HaveOccurred())
Expect(isDrifted).To(BeEmpty())
Expand Down
1 change: 1 addition & 0 deletions pkg/controllers/nodeclass/status/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl
}

if !equality.Semantic.DeepEqual(stored, nodeClass) {
nodeClass.Status.ObservedGeneration = nodeClass.Generation
if err := c.kubeClient.Status().Patch(ctx, nodeClass, client.MergeFrom(stored)); err != nil {
errs = multierr.Append(errs, client.IgnoreNotFound(err))
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/controllers/nodeclass/status/securitygroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,5 @@ func (sg *SecurityGroup) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2No
Name: *securityGroup.GroupName,
}
})
return reconcile.Result{RequeueAfter: 5 * time.Minute}, nil
return reconcile.Result{RequeueAfter: time.Minute}, nil
}
14 changes: 14 additions & 0 deletions pkg/providers/instance/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,19 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1"
corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider"
"sigs.k8s.io/karpenter/pkg/events"
"sigs.k8s.io/karpenter/pkg/operator/controller"
coreoptions "sigs.k8s.io/karpenter/pkg/operator/options"
"sigs.k8s.io/karpenter/pkg/operator/scheme"
coretest "sigs.k8s.io/karpenter/pkg/test"

"github.com/aws/karpenter-provider-aws/pkg/apis"
"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
"github.com/aws/karpenter-provider-aws/pkg/cloudprovider"
"github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status"
"github.com/aws/karpenter-provider-aws/pkg/fake"
"github.com/aws/karpenter-provider-aws/pkg/operator/options"
"github.com/aws/karpenter-provider-aws/pkg/providers/instance"
Expand All @@ -51,6 +54,7 @@ var ctx context.Context
var env *coretest.Environment
var awsEnv *test.Environment
var cloudProvider *cloudprovider.CloudProvider
var statusController controller.Controller

func TestAWS(t *testing.T) {
ctx = TestContextWithLogger(t)
Expand All @@ -65,6 +69,14 @@ var _ = BeforeSuite(func() {
awsEnv = test.NewEnvironment(ctx, env)
cloudProvider = cloudprovider.New(awsEnv.InstanceTypesProvider, awsEnv.InstanceProvider, events.NewRecorder(&record.FakeRecorder{}),
env.Client, awsEnv.AMIProvider, awsEnv.SecurityGroupProvider, awsEnv.SubnetProvider)
statusController = status.NewController(
env.Client,
awsEnv.SubnetProvider,
awsEnv.SecurityGroupProvider,
awsEnv.AMIProvider,
awsEnv.InstanceProfileProvider,
awsEnv.LaunchTemplateProvider,
)
})

var _ = AfterSuite(func() {
Expand Down Expand Up @@ -111,6 +123,8 @@ var _ = Describe("InstanceProvider", func() {
})
It("should return an ICE error when all attempted instance types return an ICE error", func() {
ExpectApplied(ctx, env.Client, nodeClaim, nodePool, nodeClass)
ExpectReconcileSucceeded(ctx, statusController, client.ObjectKeyFromObject(nodeClass))
nodeClass = ExpectExists(ctx, env.Client, nodeClass)
awsEnv.EC2API.InsufficientCapacityPools.Set([]fake.CapacityPool{
{CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1a"},
{CapacityType: corev1beta1.CapacityTypeOnDemand, InstanceType: "m5.xlarge", Zone: "test-zone-1b"},
Expand Down
Loading

0 comments on commit 2cab316

Please sign in to comment.