Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test: Fix E2E test permissions, upgrade, consolidation, and integration tests #4880

Merged
merged 9 commits into from
Oct 23, 2023
2 changes: 1 addition & 1 deletion .github/workflows/e2e-upgrade.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ jobs:
- name: run the Upgrade test suite
run: |
aws eks update-kubeconfig --name ${{ env.CLUSTER_NAME }}
CLUSTER_NAME=${{ env.CLUSTER_NAME }} INTERRUPTION_QUEUE=${{ env.CLUSTER_NAME }} CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${{ env.CLUSTER_NAME }} --query "cluster.endpoint" --output text)" TEST_SUITE="Integration" make e2etests
CLUSTER_NAME=${{ env.CLUSTER_NAME }} INTERRUPTION_QUEUE=${{ env.CLUSTER_NAME }} CLUSTER_ENDPOINT="$(aws eks describe-cluster --name ${{ env.CLUSTER_NAME }} --query "cluster.endpoint" --output text)" TEST_SUITE="Beta/Integration" make e2etests
- name: notify slack of success or failure
uses: ./.github/actions/e2e/slack/notify
if: (success() || failure()) && github.event_name != 'workflow_run' && github.event_name != 'conformance'
Expand Down
197 changes: 30 additions & 167 deletions test/cloudformation/iam_cloudformation.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -270,116 +270,7 @@ Resources:
Statement:
- Effect: Allow
Action:
# Tag Permissions
- ec2:DescribeTags
# Internet Gateway Permissions
- ec2:DescribeEgressOnlyInternetGateways
- ec2:DescribeInternetGateways
# Elastic IP Permissions
- ec2:DescribeAddresses
# Instance Permissions
- ec2:DescribeInstanceTypeOfferings
- ec2:DescribeInstanceTypes
- ec2:DescribeInstances
- ec2:DescribeKeyPairs
# Launch Template Permissions
- ec2:DescribeLaunchTemplateVersions
- ec2:DescribeLaunchTemplates
# NAT Gateway Permissions
- ec2:DescribeNatGateways
# Network Interface Permissions
- ec2:DescribeNetworkInterfaces
# Route Table Permissions
- ec2:DescribeRouteTables
# Security Group Permissions
- ec2:DescribeSecurityGroups
# Subnet Permissions
- ec2:DescribeAvailabilityZones
- ec2:DescribeSubnets
# Volume Permissions
- ec2:DescribeVolumes
- ec2:DescribeVolumesModifications
- ec2:DescribeSnapshots
# Network ACL Permissions
- ec2:DescribeNetworkAcls
# VPC Permissions
- ec2:DescribeVpcs
# Image Permissions
- ec2:DescribeImages
# Tag Permissions
- ec2:CreateTags
- ec2:DeleteTags
# Internet Gateway Permissions
- ec2:CreateEgressOnlyInternetGateway
- ec2:DeleteEgressOnlyInternetGateway
# Elastic IP Permissions
- ec2:AllocateAddress
- ec2:ReleaseAddress
# Instance Permissions
- ec2:ModifyInstanceAttribute
- ec2:DescribeInstanceAttribute
- ec2:RunInstances
- ec2:StopInstances
- ec2:TerminateInstances
- ec2:AttachNetworkInterface
- ec2:ModifyNetworkInterfaceAttribute
- ec2:DetachNetworkInterface
# Internet Gateway Permissions
- ec2:AttachInternetGateway
- ec2:CreateInternetGateway
- ec2:DeleteInternetGateway
- ec2:DetachInternetGateway
# Launch Template Permissions
- ec2:CreateLaunchTemplate
- ec2:DeleteLaunchTemplate
# Fleet Permissions
- ec2:CreateFleet
# NAT Gateway Permissions
- ec2:CreateNatGateway
- ec2:DeleteNatGateway
# Network Interface Permissions
- ec2:AssignPrivateIpAddresses
- ec2:UnassignPrivateIpAddresses
- ec2:AssignIpv6Addresses
- ec2:UnassignIpv6Addresses
- ec2:AttachNetworkInterface
- ec2:DetachNetworkInterface
- ec2:CreateNetworkInterface
- ec2:ModifyNetworkInterfaceAttribute
- ec2:DeleteNetworkInterface
- ec2:CreateNetworkInterfacePermission
# Route Table Permissions
- ec2:CreateRoute
- ec2:CreateRouteTable
- ec2:DeleteRoute
- ec2:DeleteRouteTable
- ec2:AssociateRouteTable
- ec2:DisassociateRouteTable
# Security Group Permissions
- ec2:AuthorizeSecurityGroupIngress
- ec2:CreateSecurityGroup
- ec2:DeleteSecurityGroup
- ec2:RevokeSecurityGroupIngress
# Subnet Permissions
- ec2:CreateSubnet
- ec2:DeleteSubnet
- ec2:ModifySubnetAttribute
# Volume Permissions
- ec2:CreateSnapshot
- ec2:DeleteSnapshot
- ec2:CreateVolume
- ec2:DeleteVolume
- ec2:AttachVolume
- ec2:ModifyVolume
- ec2:DetachVolume
# VPC Permissions
- ec2:AssociateVpcCidrBlock
- ec2:DisassociateVpcCidrBlock
- ec2:CreateVpc
- ec2:DeleteVpc
- ec2:DescribeVpcAttribute
- ec2:ModifyVpcAttribute
- ec2:RunInstances
- ec2:*
# Read-Only Permissions to pull ECR images needed by the NodeInstanceRole
- ecr:GetAuthorizationToken
- ecr:BatchCheckLayerAvailability
Expand All @@ -397,82 +288,54 @@ Resources:
- autoscaling:DescribeAutoScalingGroups
- autoscaling:UpdateAutoScalingGroup
# EKS ServiceRole permissions needed to handle LoadBalancer
- elasticloadbalancing:AddTags
- elasticloadbalancing:ApplySecurityGroupsToLoadBalancer
- elasticloadbalancing:AttachLoadBalancerToSubnets
- elasticloadbalancing:ConfigureHealthCheck
- elasticloadbalancing:CreateListener
- elasticloadbalancing:CreateLoadBalancer
- elasticloadbalancing:CreateLoadBalancerListeners
- elasticloadbalancing:CreateLoadBalancerPolicy
- elasticloadbalancing:CreateTargetGroup
- elasticloadbalancing:DeleteListener
- elasticloadbalancing:DeleteLoadBalancer
- elasticloadbalancing:DeleteLoadBalancerListeners
- elasticloadbalancing:DeleteTargetGroup
- elasticloadbalancing:DeregisterInstancesFromLoadBalancer
- elasticloadbalancing:DeregisterTargets
- elasticloadbalancing:DescribeListeners
- elasticloadbalancing:DescribeLoadBalancerAttributes
- elasticloadbalancing:DescribeLoadBalancerPolicies
- elasticloadbalancing:DescribeLoadBalancers
- elasticloadbalancing:DescribeTargetGroupAttributes
- elasticloadbalancing:DescribeTargetGroups
- elasticloadbalancing:DescribeTargetHealth
- elasticloadbalancing:DetachLoadBalancerFromSubnets
- elasticloadbalancing:ModifyListener
- elasticloadbalancing:ModifyLoadBalancerAttributes
- elasticloadbalancing:ModifyTargetGroup
- elasticloadbalancing:ModifyTargetGroupAttributes
- elasticloadbalancing:RegisterInstancesWithLoadBalancer
- elasticloadbalancing:RegisterTargets
- elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer
- elasticloadbalancing:SetLoadBalancerPoliciesOfListener
- elasticloadbalancing:*
- kms:CreateGrant
- kms:GenerateDataKeyWithoutPlaintext
- kms:DescribeKey
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ssm:DescribeAssociation
- ssm:GetDeployablePatchSnapshotForInstance
- ssm:GetDocument
- ssm:DescribeDocument
- ssm:GetManifest
- ssm:GetParameter
- ssm:GetParameters
- ssm:ListAssociations
- ssm:ListInstanceAssociations
- ssm:PutInventory
- ssm:PutComplianceItems
- ssm:PutConfigurePackageResult
- ssm:UpdateAssociationStatus
- ssm:UpdateInstanceAssociationStatus
- ssm:UpdateInstanceInformation
- ssm:*
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ssmmessages:CreateControlChannel
- ssmmessages:CreateDataChannel
- ssmmessages:OpenControlChannel
- ssmmessages:OpenDataChannel
- ssmmessages:*
# SSM Permissions for AmazonSSMManagedInstanceCore policy applied to the NodeInstanceRole
- ec2messages:AcknowledgeMessage
- ec2messages:DeleteMessage
- ec2messages:FailMessage
- ec2messages:GetEndpoint
- ec2messages:GetMessages
- ec2messages:SendReply
- ec2messages:*
- sqs:DeleteMessage
- sqs:GetQueueAttributes
- sqs:GetQueueUrl
- sqs:SendMessage
- sqs:ReceiveMessage
- pricing:GetProducts
- ec2:DescribeSpotPriceHistory
- eks:DescribeCluster
Resource: "*"
- Effect: Allow
Action: iam:PassRole
Resource:
- !Sub "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/KarpenterNodeRole-*"
- !GetAtt FISInterruptionRole.Arn
- Effect: Allow
Action: iam:CreateInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:RequestTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action: iam:TagInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:RequestTag/karpenter.k8s.aws/ec2nodeclass: "*"
aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action:
- iam:AddRoleToInstanceProfile
- iam:RemoveRoleFromInstanceProfile
- iam:DeleteInstanceProfile
Resource: "*"
Condition:
StringLike:
aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass: "*"
- Effect: Allow
Action: iam:GetInstanceProfile
Resource: "*"
- Effect: Allow
Action:
- aps:RemoteWrite
Expand Down
4 changes: 4 additions & 0 deletions test/pkg/environment/aws/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ import (

const WindowsDefaultImage = "mcr.microsoft.com/oss/kubernetes/pause:3.9"

// ExcludedInstanceFamilies denotes instance families that have issues during resource registration due to compatibility
// issues with versions of the VPR Resource Controller
var ExcludedInstanceFamilies = []string{"m7a", "r7a", "c7a", "r7i"}

type Environment struct {
*common.Environment
Region string
Expand Down
9 changes: 9 additions & 0 deletions test/pkg/environment/aws/expectations.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ import (
"github.com/aws/aws-sdk-go/service/iam"
"github.com/aws/aws-sdk-go/service/ssm"
"github.com/aws/aws-sdk-go/service/sts"
"github.com/mitchellh/hashstructure/v2"
. "github.com/onsi/ginkgo/v2" //nolint:revive,stylecheck
. "github.com/onsi/gomega" //nolint:revive,stylecheck
"github.com/samber/lo"
"go.uber.org/multierr"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"

"github.com/aws/karpenter/pkg/apis/v1beta1"
)

// Spot Interruption experiment details partially copied from
Expand Down Expand Up @@ -121,6 +124,12 @@ func (env *Environment) ExpectInstanceProfileExists(profileName string) iam.Inst
return lo.FromPtr(out.InstanceProfile)
}

// GetInstanceProfileName gets the string for the profile name based on the cluster name, region and the NodeClass name.
// The length of this string can never exceed the maximum instance profile name limit of 128 characters.
func (env *Environment) GetInstanceProfileName(nodeClass *v1beta1.EC2NodeClass) string {
return fmt.Sprintf("%s_%d", env.ClusterName, lo.Must(hashstructure.Hash(fmt.Sprintf("%s%s", env.Region, nodeClass.Name), hashstructure.FormatV2, nil)))
}

func (env *Environment) GetInstance(nodeName string) ec2.Instance {
node := env.Environment.GetNode(nodeName)
return env.GetInstanceByID(env.ExpectParsedProviderID(node.Spec.ProviderID))
Expand Down
4 changes: 3 additions & 1 deletion test/pkg/environment/common/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"github.com/samber/lo"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/apis/v1beta1"
"github.com/aws/karpenter-core/pkg/utils/resources"
)

Expand Down Expand Up @@ -223,7 +224,8 @@ func (m *Monitor) nodeUtilization(resource v1.ResourceName) []float64 {
for nodeName, requests := range st.nodeRequests {
allocatable := st.nodes[nodeName].Status.Allocatable[resource]
// skip any nodes we didn't launch
if _, ok := st.nodes[nodeName].Labels[v1alpha5.ProvisionerNameLabelKey]; !ok {
if st.nodes[nodeName].Labels[v1alpha5.ProvisionerNameLabelKey] == "" &&
st.nodes[nodeName].Labels[v1beta1.NodePoolLabelKey] == "" {
continue
}
if allocatable.IsZero() {
Expand Down
18 changes: 5 additions & 13 deletions test/suites/alpha/drift/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ var _ = Describe("Drift", Label("AWS"), func() {
},
})
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "true"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=true"})
})
It("should deprovision nodes that have drifted due to AMIs", func() {
// choose an old static image
Expand Down Expand Up @@ -122,7 +121,6 @@ var _ = Describe("Drift", Label("AWS"), func() {
})
It("should not deprovision nodes that have drifted without the featureGate enabled", func() {
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "false"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=false"})
// choose an old static image
parameter, err := env.SSMAPI.GetParameter(&ssm.GetParameterInput{
Name: awssdk.String("/aws/service/eks/optimized-ami/1.23/amazon-linux-2/amazon-eks-node-1.23-v20230322/image_id"),
Expand Down Expand Up @@ -377,14 +375,11 @@ var _ = Describe("Drift", Label("AWS"), func() {
}).Should(Succeed())

// Expect nodes To get cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Drift should fail and the original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored out
Eventually(func(g Gomega) {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(cordonedNodes[0]), cordonedNodes[0]))
g.Expect(cordonedNodes[0].Spec.Unschedulable).To(BeFalse())
}).WithTimeout(11 * time.Minute).Should(Succeed())
env.EventuallyExpectNodesUncordonedLegacyWithTimeout(11*time.Minute, cordonedNodes...)

Eventually(func(g Gomega) {
machines := &v1alpha5.MachineList{}
Expand Down Expand Up @@ -435,14 +430,11 @@ var _ = Describe("Drift", Label("AWS"), func() {
}).Should(Succeed())

// Expect nodes To be cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Drift should fail and original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored outr
Eventually(func(g Gomega) {
g.Expect(env.Client.Get(env, client.ObjectKeyFromObject(cordonedNodes[0]), cordonedNodes[0]))
g.Expect(cordonedNodes[0].Spec.Unschedulable).To(BeFalse())
}).WithTimeout(12 * time.Minute).Should(Succeed())
// TODO: reduce timeouts when deprovisioning waits are factored out
env.EventuallyExpectNodesUncordonedLegacyWithTimeout(11*time.Minute, cordonedNodes...)

// Expect that the new machine/node is kept around after the un-cordon
nodeList := &v1.NodeList{}
Expand Down
5 changes: 2 additions & 3 deletions test/suites/alpha/expiration/expiration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ var _ = Describe("Expiration", func() {
TTLSecondsUntilExpired: ptr.Int64(30),
})
env.ExpectSettingsOverriddenLegacy(map[string]string{"featureGates.driftEnabled": "false"})
env.ExpectSettingsOverridden(v1.EnvVar{Name: "FEATURE_GATES", Value: "Drift=true"})
})
It("should expire the node after the TTLSecondsUntilExpired is reached", func() {
var numPods int32 = 1
Expand Down Expand Up @@ -238,7 +237,7 @@ var _ = Describe("Expiration", func() {
}).Should(Succeed())

// Expect nodes To get cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Expire should fail and the original node should be uncordoned
// TODO: reduce timeouts when deprovisioning waits are factored out
Expand Down Expand Up @@ -306,7 +305,7 @@ var _ = Describe("Expiration", func() {
}).Should(Succeed())

// Expect nodes To be cordoned
cordonedNodes := env.EventuallyExpectCordonedNodeCount("==", 1)
cordonedNodes := env.EventuallyExpectCordonedNodeCountLegacy("==", 1)

// Expire should fail and original node should be uncordoned and no machines should be removed
// TODO: reduce timeouts when deprovisioning waits are factored out
Expand Down
2 changes: 1 addition & 1 deletion test/suites/alpha/integration/ami_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ var _ = Describe("AMI", func() {
{
Key: v1alpha1.LabelInstanceFamily,
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"m7a", "r7a", "c7a"},
Values: awsenv.ExcludedInstanceFamilies,
},
{
Key: v1alpha1.LabelInstanceCategory,
Expand Down
3 changes: 2 additions & 1 deletion test/suites/alpha/integration/extended_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/test"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
awsenv "github.com/aws/karpenter/test/pkg/environment/aws"

awstest "github.com/aws/karpenter/pkg/test"
)
Expand Down Expand Up @@ -135,7 +136,7 @@ var _ = Describe("Extended Resources", func() {
{
Key: v1alpha1.LabelInstanceFamily,
Operator: v1.NodeSelectorOpNotIn,
Values: []string{"m7a", "r7a"},
Values: awsenv.ExcludedInstanceFamilies,
},
},
})
Expand Down
Loading