Skip to content

Commit

Permalink
test: Fix E2E test permissions and Upgrade testing (aws#4880)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis authored and johngmyers committed May 31, 2024
1 parent ef3637d commit 3fcdb6f
Show file tree
Hide file tree
Showing 22 changed files with 125 additions and 242 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/e2e-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ jobs:
- name: run the Upgrade test suite
run: |
aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }}
CLUSTER_NAME=${{ env.CLUSTER_NAME }} INTERRUPTION_QUEUE=${{ env.CLUSTER_NAME }} CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${{ env.CLUSTER_NAME }} --query "cluster.endpoint" --output text)" TEST_SUITE="Integration" make e2etests
CLUSTER_NAME=${{ env.CLUSTER_NAME }} INTERRUPTION_QUEUE=${{ env.CLUSTER_NAME }} CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${{ env.CLUSTER_NAME }} --query "cluster.endpoint" --output text)" TEST_SUITE="Beta/Integration" make e2etests
- name: notify slack of success or failure
uses: ./.github/actions/e2e/slack/notify
if: (success() || failure()) && github.event_name != 'workflow_run' && github.event_name != 'conformance'
Expand Down
197 changes: 30 additions & 167 deletions test/cloudformation/iam_cloudformation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -270,116 +270,7 @@ Resources:
Statement:
- Effect: Allow
Action:
# Tag Permissions
- ec2:DescribeTags
# Internet Gateway Permissions
- ec2:DescribeEgressOnlyInternetGateways
- ec2:DescribeInternetGateways
# Elastic IP Permissions
- ec2:DescribeAddresses
# Instance Permissions
- ec2:DescribeInstanceTypeOfferings
- ec2:DescribeInstanceTypes
- ec2:DescribeInstances
- ec2:DescribeKeyPairs
# Launch Template Permissions
- ec2:DescribeLaunchTemplateVersions
- ec2:DescribeLaunchTemplates
# NAT Gateway Permissions
- ec2:DescribeNatGateways
# Network Interface Permissions
- ec2:DescribeNetworkInterfaces
# Route Table Permissions
- ec2:DescribeRouteTables
# Security Group Permissions
- ec2:DescribeSecurityGroups
# Subnet Permissions
- ec2:DescribeAvailabilityZones
- ec2:DescribeSubnets
# Volume Permissions
- ec2:DescribeVolumes
- ec2:DescribeVolumesModifications
- ec2:DescribeSnapshots
# Network ACL Permissions
- ec2:DescribeNetworkAcls
# VPC Permissions
- ec2:DescribeVpcs
# Image Permissions
- ec2:DescribeImages
# Tag Permissions
- ec2:CreateTags
- ec2:DeleteTags
# Internet Gateway Permissions
- ec2:CreateEgressOnlyInternetGateway
- ec2:DeleteEgressOnlyInternetGateway
# Elastic IP Permissions
- ec2:AllocateAddress
- ec2:ReleaseAddress
# Instance Permissions
- ec2:ModifyInstanceAttribute
- ec2:DescribeInstanceAttribute
- ec2:RunInstances
- ec2:StopInstances
- ec2:TerminateInstances
- ec2:AttachNetworkInterface
- ec2:ModifyNetworkInterfaceAttribute
- ec2:DetachNetworkInterface
# Internet Gateway Permissions
- ec2:AttachInternetGateway
- ec2:CreateInternetGateway
- ec2:DeleteInternetGateway
- ec2:DetachInternetGateway
# Launch Template Permissions
- ec2:CreateLaunchTemplate
- ec2:DeleteLaunchTemplate
# Fleet Permissions
- ec2:CreateFleet
# NAT Gateway Permissions
- ec2:CreateNatGateway
- ec2:DeleteNatGateway
# Network Interface Permissions
- ec2:AssignPrivateIpAddresses
- ec2:UnassignPrivateIpAddresses
- ec2:AssignIpv6Addresses
- ec2:UnassignIpv6Addresses
- ec2:AttachNetworkInterface
- ec2:DetachNetworkInterface
- ec2:CreateNetworkInterface
- ec2:ModifyNetworkInterfaceAttribute
- ec2:DeleteNetworkInterface
- ec2:CreateNetworkInterfacePermission
# Route Table Permissions
- ec2:CreateRoute
- ec2:CreateRouteTable
- ec2:DeleteRoute
- ec2:DeleteRouteTable
- ec2:AssociateRouteTable
- ec2:DisassociateRouteTable
# Security Group Permissions
- ec2:AuthorizeSecurityGroupIngress
- ec2:CreateSecurityGroup
- ec2:DeleteSecurityGroup
- ec2:RevokeSecurityGroupIngress
# Subnet Permissions
- ec2:CreateSubnet
- ec2:DeleteSubnet
- ec2:ModifySubnetAttribute
# Volume Permissions
- ec2:CreateSnapshot
- ec2:DeleteSnapshot
- ec2:CreateVolume
- ec2:DeleteVolume
- ec2:AttachVolume
- ec2:ModifyVolume
- ec2:DetachVolume
# VPC Permissions
- ec2:AssociateVpcCidrBlock
- ec2:DisassociateVpcCidrBlock
- ec2:CreateVpc
- ec2:DeleteVpc
- ec2:DescribeVpcAttribute
- ec2:ModifyVpcAttribute
- ec2:RunInstances
- ec2:*
# Read-Only Permissions to pull ECR images needed by the NodeInstanceRole
- ecr:GetAuthorizationToken
- ecr:BatchCheckLayerAvailability
Expand All @@ -397,82 +288,54 @@ Resources:
- autoscaling:DescribeAutoScalingGroups
- autoscaling:UpdateAutoScalingGroup
# EKS ServiceRole permissions needed to handle LoadBalancer
- elasticloadbalancing:AddTags
- elasticloadbalancing:ApplySecurityGroupsToLoadBalancer
- elasticloadbalancing:AttachLoadBalancerToSubnets
- elasticloadbalancing:ConfigureHealthCheck
- elasticloadbalancing:CreateListener
- elasticloadbalancing:CreateLoadBalancer
- elasticloadbalancing:CreateLoadBalancerListeners
- elasticloadbalancing:CreateLoadBalancerPolicy
- elasticloadbalancing:CreateTargetGroup
- elasticloadbalancing:DeleteListener
- elasticloadbalancing:DeleteLoadBalancer
- elasticloadbalancing:DeleteLoadBalancerListeners
- elasticloadbalancing:DeleteTargetGroup
- elasticloadbalancing:DeregisterInstancesFromLoadBalancer
- elasticloadbalancing:DeregisterTargets
- elasticloadbalancing:DescribeListeners
- elasticloadbalancing:DescribeLoadBalancerAttributes
- elasticloadbalancing:DescribeLoadBalancerPolicies
- elasticloadbalancing:DescribeLoadBalancers
- elasticloadbalancing:DescribeTargetGroupAttributes
- elasticloadbalancing:DescribeTargetGroups
- elasticloadbalancing:DescribeTargetHealth
- elasticloadbalancing:DetachLoadBalancerFromSubnets
- elasticloadbalancing:ModifyListener
- elasticloadbalancing:ModifyLoadBalancerAttributes
- elasticloadbalancing:ModifyTargetGroup
- elasticloadbalancing:ModifyTargetGroupAttributes
- elasticloadbalancing:RegisterInstancesWithLoadBalancer
- elasticloadbalancing:RegisterTargets
- elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer
- elasticloadbalancing:SetLoadBalancerPoliciesOfListener
- elasticloadbalancing:*
- kms:CreateGrant
- kms:GenerateDataKeyWithoutPlaintext
- kms:DescribeKey
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ssm:DescribeAssociation
- ssm:GetDeployablePatchSnapshotForInstance
- ssm:GetDocument
- ssm:DescribeDocument
- ssm:GetManifest
- ssm:GetParameter
- ssm:GetParameters
- ssm:ListAssociations
- ssm:ListInstanceAssociations
- ssm:PutInventory
- ssm:PutComplianceItems
- ssm:PutConfigurePackageResult
- ssm:UpdateAssociationStatus
- ssm:UpdateInstanceAssociationStatus
- ssm:UpdateInstanceInformation
- ssm:*
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ssmmessages:CreateControlChannel
- ssmmessages:CreateDataChannel
- ssmmessages:OpenControlChannel
- ssmmessages:OpenDataChannel
- ssmmessages:*
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ec2messages:AcknowledgeMessage
- ec2messages:DeleteMessage
- ec2messages:FailMessage
- ec2messages:GetEndpoint
- ec2messages:GetMessages
- ec2messages:SendReply
- ec2messages:*
- sqs:DeleteMessage
- sqs:GetQueueAttributes
- sqs:GetQueueUrl
- sqs:SendMessage
- sqs:ReceiveMessage
- pricing:GetProducts
- ec2:DescribeSpotPriceHistory
- eks:DescribeCluster
Resource: "*"
- Effect: Allow
Action: iam:PassRole
Resource:
- !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/KarpenterNodeRole-*"
- !GetAtt FISInterruptionRole.Arn
- Effect: Allow
Action: iam:CreateInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:RequestTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action: iam:TagInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:RequestTag/karpenter.k8s.aws/ec2nodeclass: "*"
aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action:
- iam:AddRoleToInstanceProfile
- iam:RemoveRoleFromInstanceProfile
- iam:DeleteInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action: iam:GetInstanceProfile
Resource: "*"
- Effect: Allow
Action:
- aps:RemoteWrite
Expand Down
4 changes: 4 additions & 0 deletions test/pkg/environment/aws/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ import (

const WindowsDefaultImage = "mcr.microsoft.com/oss/kubernetes/pause:3.9"

// ExcludedInstanceFamilies denotes instance families that have issues during resource registration due to compatibility
// issues with versions of the VPR Resource Controller
var ExcludedInstanceFamilies = []string{"m7a", "r7a", "c7a", "r7i"}

type Environment struct {
*common.Environment
Region string
Expand Down
9 changes: 9 additions & 0 deletions test/pkg/environment/aws/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ import (
"github.com/aws/aws-sdk-go/service/iam"
"github.com/aws/aws-sdk-go/service/ssm"
"github.com/aws/aws-sdk-go/service/sts"
"github.com/mitchellh/hashstructure/v2"
. "github.com/onsi/ginkgo/v2" //nolint:revive,stylecheck
. "github.com/onsi/gomega" //nolint:revive,stylecheck
"github.com/samber/lo"
"go.uber.org/multierr"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

"github.com/aws/karpenter/pkg/apis/v1beta1"
)

// Spot Interruption experiment details partially copied from
Expand Down Expand Up @@ -121,6 +124,12 @@ func (env *Environment) ExpectInstanceProfileExists(profileName string) iam.Inst
return lo.FromPtr(out.InstanceProfile)
}

// GetInstanceProfileName gets the string for the profile name based on the cluster name, region and the NodeClass name.
// The length of this string can never exceed the maximum instance profile name limit of 128 characters.
func (env *Environment) GetInstanceProfileName(nodeClass *v1beta1.EC2NodeClass) string {
return fmt.Sprintf("%s_%d", env.ClusterName, lo.Must(hashstructure.Hash(fmt.Sprintf("%s%s", env.Region, nodeClass.Name), hashstructure.FormatV2, nil)))
}

func (env *Environment) GetInstance(nodeName string) ec2.Instance {
node := env.Environment.GetNode(nodeName)
return env.GetInstanceByID(env.ExpectParsedProviderID(node.Spec.ProviderID))
Expand Down
4 changes: 3 additions & 1 deletion test/pkg/environment/common/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/samber/lo"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/utils/resources"
)

Expand Down Expand Up @@ -223,7 +224,8 @@ func (m *Monitor) nodeUtilization(resource v1.ResourceName) []float64 {
for nodeName, requests := range st.nodeRequests {
allocatable := st.nodes[nodeName].Status.Allocatable[resource]
// skip any nodes we didn't launch
if _, ok := st.nodes[nodeName].Labels[v1alpha5.ProvisionerNameLabelKey]; !ok {
if st.nodes[nodeName].Labels[v1alpha5.ProvisionerNameLabelKey] == "" &&
st.nodes[nodeName].Labels[v1beta1.NodePoolLabelKey] == "" {
continue
}
if allocatable.IsZero() {
Expand Down
18 changes: 5 additions & 13 deletions test/suites/alpha/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ var _ = Describe("Drift", Label("AWS"), func() {
},
})
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "true"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=true"})
})
It("should deprovision nodes that have drifted due to AMIs", func() {
// choose an old static image
Expand Down Expand Up @@ -122,7 +121,6 @@ var _ = Describe("Drift", Label("AWS"), func() {
})
It("should not deprovision nodes that have drifted without the featureGate enabled", func() {
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "false"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"})
// choose an old static image
parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{
Name: awssdk.String("/aws/service/eks/optimized-ami/1.23/amazon-linux-2/amazon-eks-node-1.23-v20230322/image_id"),
Expand Down Expand Up @@ -377,14 +375,11 @@ var _ = Describe("Drift", Label("AWS"), func() {
}).Should(Succeed())

// Expect nodes To get cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Drift should fail and the original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored out
Eventually(func(g Gomega) {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(cordonedNodes[0]), cordonedNodes[0]))
g.Expect(cordonedNodes[0].Spec.Unschedulable).To(BeFalse())
}).WithTimeout(11 * time.Minute).Should(Succeed())
env.EventuallyExpectNodesUncordonedLegacyWithTimeout(11*time.Minute, cordonedNodes...)

Eventually(func(g Gomega) {
machines := &v1alpha5.MachineList{}
Expand Down Expand Up @@ -435,14 +430,11 @@ var _ = Describe("Drift", Label("AWS"), func() {
}).Should(Succeed())

// Expect nodes To be cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Drift should fail and original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored outr
Eventually(func(g Gomega) {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(cordonedNodes[0]), cordonedNodes[0]))
g.Expect(cordonedNodes[0].Spec.Unschedulable).To(BeFalse())
}).WithTimeout(12 * time.Minute).Should(Succeed())
// TODO: reduce timeouts when deprovisioning waits are factored out
env.EventuallyExpectNodesUncordonedLegacyWithTimeout(11*time.Minute, cordonedNodes...)

// Expect that the new machine/node is kept around after the un-cordon
nodeList := &v1.NodeList{}
Expand Down
5 changes: 2 additions & 3 deletions test/suites/alpha/expiration/expiration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ var _ = Describe("Expiration", func() {
TTLSecondsUntilExpired: ptr.Int64(30),
})
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "false"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=true"})
})
It("should expire the node after the TTLSecondsUntilExpired is reached", func() {
var numPods int32 = 1
Expand Down Expand Up @@ -238,7 +237,7 @@ var _ = Describe("Expiration", func() {
}).Should(Succeed())

// Expect nodes To get cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Expire should fail and the original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored out
Expand Down Expand Up @@ -306,7 +305,7 @@ var _ = Describe("Expiration", func() {
}).Should(Succeed())

// Expect nodes To be cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Expire should fail and original node should be uncordoned and no machines should be removed
// TODO: reduce timeouts when deprovisioning waits are factored out
Expand Down
2 changes: 1 addition & 1 deletion test/suites/alpha/integration/ami_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ var _ = Describe("AMI", func() {
{
Key: v1alpha1.LabelInstanceFamily,
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"m7a", "r7a", "c7a"},
Values: awsenv.ExcludedInstanceFamilies,
},
{
Key: v1alpha1.LabelInstanceCategory,
Expand Down
3 changes: 2 additions & 1 deletion test/suites/alpha/integration/extended_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/test"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
awsenv "github.com/aws/karpenter/test/pkg/environment/aws"

awstest "github.com/aws/karpenter/pkg/test"
)
Expand Down Expand Up @@ -135,7 +136,7 @@ var _ = Describe("Extended Resources", func() {
{
Key: v1alpha1.LabelInstanceFamily,
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"m7a", "r7a"},
Values: awsenv.ExcludedInstanceFamilies,
},
},
})
Expand Down
Loading

0 comments on commit 3fcdb6f

Please sign in to comment.