diff --git a/kubetest2/internal/deployers/eksapi/deployer.go b/kubetest2/internal/deployers/eksapi/deployer.go index d1adbd402..8c854cb92 100644 --- a/kubetest2/internal/deployers/eksapi/deployer.go +++ b/kubetest2/internal/deployers/eksapi/deployer.go @@ -3,7 +3,6 @@ package eksapi import ( "flag" "fmt" - "path" "path/filepath" "time" @@ -13,7 +12,6 @@ import ( "github.com/aws/aws-k8s-tester/kubetest2/internal/util" "github.com/aws/aws-sdk-go-v2/service/cloudwatch" - cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" ekstypes "github.com/aws/aws-sdk-go-v2/service/eks/types" "github.com/octago/sflags/gen/gpflag" "github.com/spf13/pflag" @@ -29,16 +27,6 @@ const ResourcePrefix = "kubetest2-" + DeployerName var SupportedNodeNameStrategy = []string{"SessionName", "EC2PrivateDNSName"} -var DeployerMetricNamespace = path.Join("kubetest2", DeployerName) - -var ( - totalRuntimeSeconds = &metrics.MetricSpec{ - Namespace: DeployerMetricNamespace, - Metric: "TotalRuntimeSeconds", - Unit: cloudwatchtypes.StandardUnitSeconds, - } -) - // assert that deployer implements optional interfaces var _ types.DeployerWithKubeconfig = &deployer{} var _ types.DeployerWithInit = &deployer{} @@ -54,6 +42,8 @@ type deployer struct { addonManager *AddonManager nodegroupManager *NodegroupManager + awsClients *awsClients + infra *Infrastructure cluster *Cluster @@ -113,7 +103,7 @@ func (d *deployer) Version() string { func (d *deployer) Init() error { d.initTime = time.Now() awsConfig := awssdk.NewConfig() - awsClients := newAWSClients(awsConfig, d.EKSEndpointURL) + d.awsClients = newAWSClients(awsConfig, d.EKSEndpointURL) resourceID := ResourcePrefix + "-" + d.commonOptions.RunID() if d.deployerOptions.EmitMetrics { client := cloudwatch.NewFromConfig(awsConfig) @@ -121,10 +111,10 @@ func (d *deployer) Init() error { } else { d.metrics = metrics.NewNoopMetricRegistry() } - d.infraManager = NewInfrastructureManager(awsClients, resourceID, d.metrics) - d.clusterManager = NewClusterManager(awsClients, resourceID) - d.addonManager = NewAddonManager(awsClients) - d.nodegroupManager = NewNodegroupManager(awsClients, resourceID) + d.infraManager = NewInfrastructureManager(d.awsClients, resourceID, d.metrics) + d.clusterManager = NewClusterManager(d.awsClients, resourceID) + d.addonManager = NewAddonManager(d.awsClients) + d.nodegroupManager = NewNodegroupManager(d.awsClients, resourceID) return nil } @@ -205,6 +195,11 @@ func (d *deployer) Up() error { if err := waitForReadyNodes(k8sClient, d.Nodes, d.NodeReadyTimeout); err != nil { return err } + if d.EmitMetrics { + if err := emitNodeMetrics(d.metrics, k8sClient, d.awsClients.EC2()); err != nil { + return err + } + } return nil } diff --git a/kubetest2/internal/deployers/eksapi/k8s.go b/kubetest2/internal/deployers/eksapi/k8s.go index 55bc4ac02..655c678a4 100644 --- a/kubetest2/internal/deployers/eksapi/k8s.go +++ b/kubetest2/internal/deployers/eksapi/k8s.go @@ -3,8 +3,12 @@ package eksapi import ( "context" "fmt" + "net/url" + "strings" "time" + "github.com/aws/aws-k8s-tester/kubetest2/internal/metrics" + "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/pkg/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" v1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -33,10 +37,11 @@ func waitForReadyNodes(client *kubernetes.Clientset, nodeCount int, timeout time return errors.Wrap(err, "creating node watcher") } defer watcher.Stop() - counter, err := getReadyNodes(client) + initialReadyNodes, err := getReadyNodes(client) if err != nil { return errors.Wrap(err, "listing nodes") } + counter := len(initialReadyNodes) ctx, _ := context.WithTimeout(context.Background(), timeout) for { select { @@ -70,27 +75,35 @@ func waitForReadyNodes(client *kubernetes.Clientset, nodeCount int, timeout time return nil } -func getReadyNodes(client kubernetes.Interface) (int, error) { +func getReadyNodes(client kubernetes.Interface) ([]corev1.Node, error) { nodes, err := client.CoreV1().Nodes().List(context.TODO(), v1.ListOptions{}) if err != nil { - return 0, err + return nil, err } - counter := 0 + var readyNodes []corev1.Node for _, node := range nodes.Items { if isNodeReady(&node) { - counter++ + readyNodes = append(readyNodes, node) } } - return counter, nil + return readyNodes, nil } func isNodeReady(node *corev1.Node) bool { + c := getNodeReadyCondition(node) + if c == nil { + return false + } + return c.Status == corev1.ConditionTrue +} + +func getNodeReadyCondition(node *corev1.Node) *corev1.NodeCondition { for _, c := range node.Status.Conditions { - if c.Type == corev1.NodeReady && c.Status == corev1.ConditionTrue { - return true + if c.Type == corev1.NodeReady { + return &c } } - return false + return nil } func createAWSAuthConfigMap(client *kubernetes.Clientset, nodeNameStrategy string, nodeRoleARN string) error { @@ -110,3 +123,64 @@ func createAWSAuthConfigMap(client *kubernetes.Clientset, nodeNameStrategy strin }, metav1.CreateOptions{}) return err } + +func emitNodeMetrics(metricRegistry metrics.MetricRegistry, k8sClient *kubernetes.Clientset, ec2Client *ec2.Client) error { + nodes, err := getReadyNodes(k8sClient) + if err != nil { + return err + } + var errs []error + for _, node := range nodes { + providerId, err := parseKubernetesProviderID(node.Spec.ProviderID) + if err != nil { + errs = append(errs, err) + continue + } + instanceInfo, err := ec2Client.DescribeInstances(context.TODO(), &ec2.DescribeInstancesInput{ + InstanceIds: []string{providerId.InstanceID}, + }) + if err != nil { + errs = append(errs, err) + continue + } + launchTime := *instanceInfo.Reservations[0].Instances[0].LaunchTime + timeToRegistration := node.ObjectMeta.CreationTimestamp.Time.Sub(launchTime) + timeToReady := getNodeReadyCondition(&node).LastTransitionTime.Time.Sub(launchTime) + + nodeDimensions := map[string]string{ + "instanceType": node.ObjectMeta.Labels[corev1.LabelInstanceTypeStable], + "os": node.ObjectMeta.Labels[corev1.LabelOSStable], + "arch": node.ObjectMeta.Labels[corev1.LabelArchStable], + } + + metricRegistry.Record(nodeTimeToRegistrationSeconds, timeToRegistration.Seconds(), nodeDimensions) + metricRegistry.Record(nodeTimeToReadySeconds, timeToReady.Seconds(), nodeDimensions) + } + return nil +} + +type KubernetesProviderID struct { + AvailabilityZone string + InstanceID string +} + +func parseKubernetesProviderID(rawProviderId string) (*KubernetesProviderID, error) { + url, err := url.Parse(rawProviderId) + if err != nil { + return nil, fmt.Errorf("malformed provider ID: %s", rawProviderId) + } + if url.Scheme != "aws" { + return nil, fmt.Errorf("usupported provider ID scheme: %s", url.Scheme) + } + if url.Path == "" { + return nil, fmt.Errorf("provider ID path is empty: %s", rawProviderId) + } + parts := strings.Split(url.Path, "/") + if len(parts) != 2 { + return nil, fmt.Errorf("provider ID path does not have 2 parts: %s", url.Path) + } + return &KubernetesProviderID{ + AvailabilityZone: parts[0], + InstanceID: parts[1], + }, nil +} diff --git a/kubetest2/internal/deployers/eksapi/metrics.go b/kubetest2/internal/deployers/eksapi/metrics.go new file mode 100644 index 000000000..ace0e1639 --- /dev/null +++ b/kubetest2/internal/deployers/eksapi/metrics.go @@ -0,0 +1,30 @@ +package eksapi + +import ( + "path" + + "github.com/aws/aws-k8s-tester/kubetest2/internal/metrics" + cloudwatchtypes "github.com/aws/aws-sdk-go-v2/service/cloudwatch/types" +) + +var DeployerMetricNamespace = path.Join("kubetest2", DeployerName) + +var ( + totalRuntimeSeconds = &metrics.MetricSpec{ + Namespace: DeployerMetricNamespace, + Metric: "TotalRuntimeSeconds", + Unit: cloudwatchtypes.StandardUnitSeconds, + } + + nodeTimeToRegistrationSeconds = &metrics.MetricSpec{ + Namespace: DeployerMetricNamespace, + Metric: "NodeTimeToRegistrationSeconds", + Unit: cloudwatchtypes.StandardUnitSeconds, + } + + nodeTimeToReadySeconds = &metrics.MetricSpec{ + Namespace: DeployerMetricNamespace, + Metric: "NodeTimeToReadySeconds", + Unit: cloudwatchtypes.StandardUnitSeconds, + } +)