-
Notifications
You must be signed in to change notification settings - Fork 960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Placement Group Support #5307
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,6 +105,9 @@ type EC2NodeClassSpec struct { | |
// +kubebuilder:default={"httpEndpoint":"enabled","httpProtocolIPv6":"disabled","httpPutResponseHopLimit":2,"httpTokens":"required"} | ||
// +optional | ||
MetadataOptions *MetadataOptions `json:"metadataOptions,omitempty"` | ||
// PlacementGroupSelectorTerms is a list of PlacementGroupSelector. The terms are ORed. | ||
// +optional | ||
PlacementGroupSelectorTerms []PlacementGroupSelectorTerm `json:"placementGroupSelectorTerms,omitempty" hash:"ignore"` | ||
// Context is a Reserved field in EC2 APIs | ||
// https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_CreateFleet.html | ||
// +optional | ||
|
@@ -167,6 +170,14 @@ type AMISelectorTerm struct { | |
Owner string `json:"owner,omitempty"` | ||
} | ||
|
||
// PlacementGroupSelectorTerm defines the selection logic for ec2 placement groups | ||
// that are used to launch nodes. If multiple fields are used for selection, the requirements are ANDed | ||
type PlacementGroupSelectorTerm struct { | ||
// Name of the placement group to be selected | ||
// +optional | ||
Name string `json:"name,omitempty"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see that it's possible to also use tags here as well. I wonder if we also support tagging initially similar to the other selectors There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One thing that's nice about tagging and naming is that it works consistently across accounts, so if you have similar set-ups for different accounts, everything just works out-of-the-box. This is the general philosophy that Karpenter has taken towards selectors in the project, ensuring that we can move Karpenter from one account to another or from one region to another and everything "just works." The trade-off of opening up this selector to tags is that now multiple placement groups can be returned, so how do you pick between them? There probably needs to be a consistent ordering and the selection between them shouldn't be random, particularly when it comes to the interaction of this feature with the drift feature. |
||
} | ||
|
||
// MetadataOptions contains parameters for specifying the exposure of the | ||
// Instance Metadata Service to provisioned EC2 nodes. | ||
type MetadataOptions struct { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -63,6 +63,9 @@ type EC2NodeClassStatus struct { | |
// cluster under the AMI selectors. | ||
// +optional | ||
AMIs []AMI `json:"amis,omitempty"` | ||
// PlacementGroups contains the ec2 placement group arns | ||
// +optional | ||
PlacementGroups []string `json:"placementGroups,omitempty"` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we resolve the strategy into the status block here? If the placement group type is |
||
// InstanceProfile contains the resolved instance profile for the role | ||
// +optional | ||
InstanceProfile string `json:"instanceProfile,omitempty"` | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -45,6 +45,7 @@ import ( | |
"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" | ||
"github.com/aws/karpenter-provider-aws/pkg/providers/amifamily" | ||
"github.com/aws/karpenter-provider-aws/pkg/providers/instanceprofile" | ||
"github.com/aws/karpenter-provider-aws/pkg/providers/placementgroup" | ||
"github.com/aws/karpenter-provider-aws/pkg/providers/securitygroup" | ||
"github.com/aws/karpenter-provider-aws/pkg/providers/subnet" | ||
) | ||
|
@@ -56,17 +57,19 @@ type Controller struct { | |
securityGroupProvider *securitygroup.Provider | ||
amiProvider *amifamily.Provider | ||
instanceProfileProvider *instanceprofile.Provider | ||
placementGroupProvider *placementgroup.Provider | ||
} | ||
|
||
func NewController(kubeClient client.Client, recorder events.Recorder, subnetProvider *subnet.Provider, securityGroupProvider *securitygroup.Provider, | ||
amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider) *Controller { | ||
amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider, placementGroupProvider *placementgroup.Provider) *Controller { | ||
return &Controller{ | ||
kubeClient: kubeClient, | ||
recorder: recorder, | ||
subnetProvider: subnetProvider, | ||
securityGroupProvider: securityGroupProvider, | ||
amiProvider: amiProvider, | ||
instanceProfileProvider: instanceProfileProvider, | ||
placementGroupProvider: placementGroupProvider, | ||
} | ||
} | ||
|
||
|
@@ -79,6 +82,7 @@ func (c *Controller) Reconcile(ctx context.Context, nodeClass *v1beta1.EC2NodeCl | |
c.resolveSecurityGroups(ctx, nodeClass), | ||
c.resolveAMIs(ctx, nodeClass), | ||
c.resolveInstanceProfile(ctx, nodeClass), | ||
c.resolvePlacementGroups(ctx, nodeClass), | ||
) | ||
if !equality.Semantic.DeepEqual(stored, nodeClass) { | ||
statusCopy := nodeClass.DeepCopy() | ||
|
@@ -194,6 +198,17 @@ func (c *Controller) resolveAMIs(ctx context.Context, nodeClass *v1beta1.EC2Node | |
return nil | ||
} | ||
|
||
func (c *Controller) resolvePlacementGroups(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { | ||
result, err := c.placementGroupProvider.Get(ctx, nodeClass) | ||
if err != nil { | ||
return err | ||
} | ||
if result != nil { | ||
nodeClass.Status.PlacementGroups = append(nodeClass.Status.PlacementGroups, *result.GroupArn) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is relevant here, but its instructions for proceeding are unclear https://github.com/aws/karpenter-provider-aws/pull/4553/files#r1405582171 |
||
} | ||
return nil | ||
} | ||
|
||
func (c *Controller) resolveInstanceProfile(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) error { | ||
if nodeClass.Spec.Role != "" { | ||
name, err := c.instanceProfileProvider.Create(ctx, nodeClass) | ||
|
@@ -215,9 +230,9 @@ type NodeClassController struct { | |
} | ||
|
||
func NewNodeClassController(kubeClient client.Client, recorder events.Recorder, subnetProvider *subnet.Provider, securityGroupProvider *securitygroup.Provider, | ||
amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider) corecontroller.Controller { | ||
amiProvider *amifamily.Provider, instanceProfileProvider *instanceprofile.Provider, placementProvider *placementgroup.Provider) corecontroller.Controller { | ||
return corecontroller.Typed[*v1beta1.EC2NodeClass](kubeClient, &NodeClassController{ | ||
Controller: NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider), | ||
Controller: NewController(kubeClient, recorder, subnetProvider, securityGroupProvider, amiProvider, instanceProfileProvider, placementProvider), | ||
}) | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
|
||
http://www.apache.org/licenses/LICENSE-2.0 | ||
|
||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
*/ | ||
|
||
package placementgroup | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"sync" | ||
|
||
"github.com/aws/aws-sdk-go/service/ec2" | ||
"github.com/aws/aws-sdk-go/service/ec2/ec2iface" | ||
"github.com/mitchellh/hashstructure/v2" | ||
"github.com/patrickmn/go-cache" | ||
|
||
"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" | ||
|
||
"knative.dev/pkg/logging" | ||
|
||
"sigs.k8s.io/karpenter/pkg/utils/pretty" | ||
) | ||
|
||
type Provider struct { | ||
sync.RWMutex | ||
ec2api ec2iface.EC2API | ||
cache *cache.Cache | ||
cm *pretty.ChangeMonitor | ||
} | ||
|
||
func NewProvider(ec2api ec2iface.EC2API, cache *cache.Cache) *Provider { | ||
return &Provider{ | ||
ec2api: ec2api, | ||
cm: pretty.NewChangeMonitor(), | ||
// TODO: Remove cache for v1beta1, utilize resolved subnet from the AWSNodeTemplate.status | ||
// Subnets are sorted on AvailableIpAddressCount, descending order | ||
cache: cache, | ||
} | ||
} | ||
|
||
func (p *Provider) Get(ctx context.Context, nodeClass *v1beta1.EC2NodeClass) (*ec2.PlacementGroup, error) { | ||
p.Lock() | ||
defer p.Unlock() | ||
|
||
// Get selectors from the nodeClass, exit if no selectors defined | ||
selectors := nodeClass.Spec.PlacementGroupSelectorTerms | ||
if selectors == nil { | ||
return nil, nil | ||
} | ||
|
||
// Look for a cached result | ||
hash, err := hashstructure.Hash(selectors, hashstructure.FormatV2, &hashstructure.HashOptions{SlicesAsSets: true}) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if cached, ok := p.cache.Get(fmt.Sprint(hash)); ok { | ||
return cached.(*ec2.PlacementGroup), nil | ||
} | ||
|
||
var match *ec2.PlacementGroup | ||
// Look up all ec2 placement groups | ||
output, err := p.ec2api.DescribePlacementGroupsWithContext(ctx, &ec2.DescribePlacementGroupsInput{}) | ||
if err != nil { | ||
logging.FromContext(ctx).Errorf("discovering placement groups, %w", err) | ||
return nil, err | ||
} | ||
for i := range output.PlacementGroups { | ||
// filter results to only include those that match at least 1 selector | ||
for x := range selectors { | ||
if *output.PlacementGroups[i].GroupName == selectors[x].Name { | ||
match = output.PlacementGroups[i] | ||
p.cache.SetDefault(fmt.Sprint(hash), match) | ||
break | ||
} | ||
} | ||
} | ||
return match, nil | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a question here of how we should pass requirements down when using something like
cluster
placement groups OR when using something likespread
withrack
.When using the
spread
withrack
, the EC2 documentation mentions that you can't launch more than 7 instances in a single AZ (https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/placement-groups.html#:~:text=A%20rack%20spread%20placement%20group%20supports%20a%20maximum%20of%20seven%20running%20instances%20per%20Availability%20Zone), which means that we are restricted with our node limits when using this type of placement group. One option here is that we could constrain our requirements when using this type of placement group so that when we run out of instances to launch in a single AZ, we just get rid of that AZ from our requirements and start launching in other AZs. Another option here is that we allow to select on multiple placement groups and if we run out of space with one of thespread
placement groups we just move to the other one.This handle differently, though, when we are dealing with
cluster
orpartition
placement groups where there is no limit on the number of instances that can be launched and it makes less sense to allow for multiple placement groups to be selected on.Based on all of these considerations, I think we should think about writing up a small design that considers the use-cases around placement groups, how they meld with requirements and what the API surface should look like.