diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 501149c3b431..9a51e4537aef 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -44,6 +44,7 @@ func main() { op. WithControllers(ctx, corecontrollers.NewControllers( + op.Manager, op.Clock, op.GetClient(), op.EventRecorder, @@ -52,6 +53,7 @@ func main() { WithWebhooks(ctx, corewebhooks.NewWebhooks()...). WithControllers(ctx, controllers.NewControllers( ctx, + op.Manager, op.Session, op.Clock, op.GetClient(), diff --git a/go.mod b/go.mod index d5662bb2b9b0..4cda3bc733d9 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.18.4 - sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905 + sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index 6a77ecf659d0..b53638d5c635 100644 --- a/go.sum +++ b/go.sum @@ -763,8 +763,8 @@ sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHv sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905 h1:4ulecMwrSJl9EunKwfDGBhY0i3vJOYmI1GWs/JzsrjQ= -sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905/go.mod h1:eqRbKU0hmncoJXhh+MI8sCLYTjKDvoVPzo+myhDjvUI= +sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac h1:8nfNoKGJSAzTMKxweI4DcTADPyMY/oCW2x1qgx3gUVY= +sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac/go.mod h1:jwEZ2Efxsc0yyNkrDEFN2RduAwlm/s7reIVNblZ8vyM= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/docgen.sh b/hack/docgen.sh index eef3f5ac2b61..71ab2fa13f26 100755 --- a/hack/docgen.sh +++ b/hack/docgen.sh @@ -13,7 +13,7 @@ AWS_SDK_GO_PROMETHEUS_DIR=$(go list -m -f '{{ .Dir }}' github.com/jonathan-innis OPERATORPKG_DIR=$(go list -m -f '{{ .Dir }}' github.com/awslabs/operatorpkg) compatibilitymatrix -go run hack/docs/metrics_gen_docs.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}/metrics" website/content/en/preview/reference/metrics.md +go run hack/docs/metrics_gen_docs.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}" website/content/en/preview/reference/metrics.md go run hack/docs/instancetypes_gen_docs.go website/content/en/preview/reference/instance-types.md go run hack/docs/configuration_gen_docs.go website/content/en/preview/reference/settings.md cd charts/karpenter && helm-docs diff --git a/hack/docs/metrics_gen_docs.go b/hack/docs/metrics_gen_docs.go index 00c779bcc0b7..b2fac43c1bcb 100644 --- a/hack/docs/metrics_gen_docs.go +++ b/hack/docs/metrics_gen_docs.go @@ -176,15 +176,16 @@ func bySubsystem(metrics []metricInfo) func(i int, j int) bool { // Higher ordering comes first. If a value isn't designated here then the subsystem will be given a default of 0. // Metrics without a subsystem come first since there is no designation for the bucket they fall under subSystemSortOrder := map[string]int{ - "": 100, - "nodepool": 10, - "nodeclaims": 9, - "nodes": 8, - "pods": 7, - "workqueue": -1, - "client_go": -1, - "aws_sdk_go": -1, - "leader_election": -2, + "": 100, + "nodepool": 10, + "nodeclaims": 9, + "nodes": 8, + "pods": 7, + "status_condition": -1, + "workqueue": -1, + "client_go": -1, + "aws_sdk_go": -1, + "leader_election": -2, } return func(i, j int) bool { @@ -314,6 +315,8 @@ func getIdentMapping(identName string) (string, error) { "metrics.Namespace": metrics.Namespace, "Namespace": metrics.Namespace, + "MetricNamespace": "operator", + "MetricSubsystem": "status_condition", "WorkQueueSubsystem": "workqueue", "DepthKey": "depth", "AddsKey": "adds_total", diff --git a/pkg/apis/v1/ec2nodeclass_conversion.go b/pkg/apis/v1/ec2nodeclass_conversion.go index f53902d57ca0..15518f8ed381 100644 --- a/pkg/apis/v1/ec2nodeclass_conversion.go +++ b/pkg/apis/v1/ec2nodeclass_conversion.go @@ -17,9 +17,10 @@ package v1 import ( "context" - "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/samber/lo" "knative.dev/pkg/apis" + + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" ) func (in *EC2NodeClass) ConvertTo(ctx context.Context, to apis.Convertible) error { diff --git a/pkg/apis/v1/suite_test.go b/pkg/apis/v1/suite_test.go index f75223a7d204..79375cd7cf85 100644 --- a/pkg/apis/v1/suite_test.go +++ b/pkg/apis/v1/suite_test.go @@ -16,9 +16,10 @@ package v1_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" . "knative.dev/pkg/logging/testing" diff --git a/pkg/apis/v1beta1/suite_test.go b/pkg/apis/v1beta1/suite_test.go index 547a9b548cb9..5ec90893e7c1 100644 --- a/pkg/apis/v1beta1/suite_test.go +++ b/pkg/apis/v1beta1/suite_test.go @@ -16,9 +16,10 @@ package v1beta1_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index b270658e95ef..dbeaf727a282 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -18,8 +18,11 @@ import ( "context" "github.com/awslabs/operatorpkg/controller" + "github.com/awslabs/operatorpkg/status" + "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" nodeclasshash "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/hash" nodeclassstatus "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status" nodeclasstermination "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/termination" @@ -50,7 +53,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) -func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, +func NewControllers(ctx context.Context, mgr manager.Manager, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, unavailableOfferings *cache.UnavailableOfferings, cloudProvider cloudprovider.CloudProvider, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, instanceProfileProvider instanceprofile.Provider, instanceProvider instance.Provider, pricingProvider pricing.Provider, amiProvider amifamily.Provider, launchTemplateProvider launchtemplate.Provider, instanceTypeProvider instancetype.Provider) []controller.Controller { @@ -63,6 +66,7 @@ func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, nodeclaimtagging.NewController(kubeClient, instanceProvider), controllerspricing.NewController(pricingProvider), controllersinstancetype.NewController(instanceTypeProvider), + status.NewController[*v1beta1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")), } if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.New(sess) diff --git a/pkg/controllers/nodeclass/status/instanceprofile_test.go b/pkg/controllers/nodeclass/status/instanceprofile_test.go index 505d98768609..14c3317955cb 100644 --- a/pkg/controllers/nodeclass/status/instanceprofile_test.go +++ b/pkg/controllers/nodeclass/status/instanceprofile_test.go @@ -17,9 +17,10 @@ package status_test import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/iam" - "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/samber/lo" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + "github.com/aws/karpenter-provider-aws/pkg/fake" "github.com/aws/karpenter-provider-aws/pkg/operator/options" diff --git a/pkg/controllers/nodeclass/status/subnet_test.go b/pkg/controllers/nodeclass/status/subnet_test.go index b2e9e0c70535..01c4c90e4115 100644 --- a/pkg/controllers/nodeclass/status/subnet_test.go +++ b/pkg/controllers/nodeclass/status/subnet_test.go @@ -17,6 +17,7 @@ package status_test import ( "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "github.com/aws/karpenter-provider-aws/pkg/test" diff --git a/pkg/controllers/nodeclass/status/suite_test.go b/pkg/controllers/nodeclass/status/suite_test.go index 4c079269167f..3d780e426e04 100644 --- a/pkg/controllers/nodeclass/status/suite_test.go +++ b/pkg/controllers/nodeclass/status/suite_test.go @@ -16,9 +16,10 @@ package status_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" coretest "sigs.k8s.io/karpenter/pkg/test" diff --git a/pkg/controllers/nodeclass/termination/suite_test.go b/pkg/controllers/nodeclass/termination/suite_test.go index 8f89444a88e2..4fdb80b28c07 100644 --- a/pkg/controllers/nodeclass/termination/suite_test.go +++ b/pkg/controllers/nodeclass/termination/suite_test.go @@ -17,10 +17,11 @@ package termination_test import ( "context" "fmt" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/aws/aws-sdk-go/service/iam" diff --git a/pkg/controllers/providers/instancetype/suite_test.go b/pkg/controllers/providers/instancetype/suite_test.go index 536a1c8a8cc3..b0036518272a 100644 --- a/pkg/controllers/providers/instancetype/suite_test.go +++ b/pkg/controllers/providers/instancetype/suite_test.go @@ -16,9 +16,10 @@ package instancetype_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + v1 "k8s.io/api/core/v1" corev1beta1 "sigs.k8s.io/karpenter/pkg/apis/v1beta1" coreoptions "sigs.k8s.io/karpenter/pkg/operator/options" diff --git a/pkg/controllers/providers/pricing/suite_test.go b/pkg/controllers/providers/pricing/suite_test.go index 7ccbb6694dc1..86edd6c5eb4c 100644 --- a/pkg/controllers/providers/pricing/suite_test.go +++ b/pkg/controllers/providers/pricing/suite_test.go @@ -17,10 +17,11 @@ package pricing_test import ( "context" "fmt" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" awspricing "github.com/aws/aws-sdk-go/service/pricing" diff --git a/pkg/fake/cloudprovider.go b/pkg/fake/cloudprovider.go index 41c526798898..04e430a3a66c 100644 --- a/pkg/fake/cloudprovider.go +++ b/pkg/fake/cloudprovider.go @@ -16,14 +16,16 @@ package fake import ( "context" + "github.com/awslabs/operatorpkg/status" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - providerv1beta1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "sigs.k8s.io/karpenter/pkg/apis/v1beta1" corecloudprovider "sigs.k8s.io/karpenter/pkg/cloudprovider" "sigs.k8s.io/karpenter/pkg/test" + + providerv1beta1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" ) const ( diff --git a/pkg/operator/suite_test.go b/pkg/operator/suite_test.go index a7b4276ed4e1..13aed00a0fd2 100644 --- a/pkg/operator/suite_test.go +++ b/pkg/operator/suite_test.go @@ -17,9 +17,10 @@ package operator_test import ( "context" "errors" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/service/eks" "github.com/samber/lo" diff --git a/pkg/providers/amifamily/suite_test.go b/pkg/providers/amifamily/suite_test.go index 792ca4383e2a..13bb2506bfad 100644 --- a/pkg/providers/amifamily/suite_test.go +++ b/pkg/providers/amifamily/suite_test.go @@ -17,12 +17,13 @@ package amifamily_test import ( "context" "fmt" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "sort" "sync" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" diff --git a/pkg/providers/instance/suite_test.go b/pkg/providers/instance/suite_test.go index ca5b9a75eb88..80b60b437009 100644 --- a/pkg/providers/instance/suite_test.go +++ b/pkg/providers/instance/suite_test.go @@ -17,10 +17,11 @@ package instance_test import ( "context" "fmt" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/awslabs/operatorpkg/object" diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 71a7614e566c..ad434cc85967 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -20,13 +20,14 @@ import ( "math" "net" "reflect" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "sort" "strings" "sync" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/awslabs/operatorpkg/status" diff --git a/pkg/providers/launchtemplate/suite_test.go b/pkg/providers/launchtemplate/suite_test.go index 75408c00a83e..aac85d53a014 100644 --- a/pkg/providers/launchtemplate/suite_test.go +++ b/pkg/providers/launchtemplate/suite_test.go @@ -21,12 +21,13 @@ import ( "fmt" "net" "os" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "strconv" "strings" "testing" "time" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/ec2" diff --git a/pkg/providers/securitygroup/suite_test.go b/pkg/providers/securitygroup/suite_test.go index 696db69d83ac..8b414ce63f52 100644 --- a/pkg/providers/securitygroup/suite_test.go +++ b/pkg/providers/securitygroup/suite_test.go @@ -16,11 +16,12 @@ package securitygroup_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "sort" "sync" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/samber/lo" diff --git a/pkg/providers/subnet/suite_test.go b/pkg/providers/subnet/suite_test.go index 43c484eed9e8..1ce1c48af5bb 100644 --- a/pkg/providers/subnet/suite_test.go +++ b/pkg/providers/subnet/suite_test.go @@ -16,11 +16,12 @@ package subnet_test import ( "context" - "sigs.k8s.io/karpenter/pkg/test/v1alpha1" "sort" "sync" "testing" + "sigs.k8s.io/karpenter/pkg/test/v1alpha1" + "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" "github.com/samber/lo" diff --git a/pkg/webhooks/webhooks.go b/pkg/webhooks/webhooks.go index 76d2d2f140b2..447a574ab4d5 100644 --- a/pkg/webhooks/webhooks.go +++ b/pkg/webhooks/webhooks.go @@ -26,9 +26,10 @@ import ( "knative.dev/pkg/webhook/resourcesemantics/defaulting" "knative.dev/pkg/webhook/resourcesemantics/validation" + "github.com/awslabs/operatorpkg/object" + v1 "github.com/aws/karpenter-provider-aws/pkg/apis/v1" "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" - "github.com/awslabs/operatorpkg/object" ) var ( diff --git a/test/suites/expiration/suite_test.go b/test/suites/expiration/suite_test.go index 2dd5a95f2638..13095367a30a 100644 --- a/test/suites/expiration/suite_test.go +++ b/test/suites/expiration/suite_test.go @@ -93,7 +93,7 @@ var _ = Describe("Expiration", func() { env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration // Set the expireAfter value to get the node deleted - nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second*15)} + nodePool.Spec.Disruption.ExpireAfter = corev1beta1.NillableDuration{Duration: lo.ToPtr(time.Second * 15)} env.ExpectUpdated(nodePool) // Eventually the node will be tainted, which means its actively being disrupted @@ -149,7 +149,7 @@ var _ = Describe("Expiration", func() { env.Monitor.Reset() // Reset the monitor so that we can expect a single node to be spun up after expiration // Set the expireAfter value to get the node deleted - nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Second*15) + nodePool.Spec.Disruption.ExpireAfter.Duration = lo.ToPtr(time.Second * 15) env.ExpectUpdated(nodePool) // Eventually the node will be tainted, which means its actively being disrupted diff --git a/test/suites/integration/instance_profile_test.go b/test/suites/integration/instance_profile_test.go index 68deef309752..e395a3dfb54d 100644 --- a/test/suites/integration/instance_profile_test.go +++ b/test/suites/integration/instance_profile_test.go @@ -16,9 +16,10 @@ package integration_test import ( "fmt" - "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" "time" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" + "github.com/awslabs/operatorpkg/status" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/website/content/en/preview/reference/metrics.md b/website/content/en/preview/reference/metrics.md index e8da1a2d33c0..9cc551405eba 100644 --- a/website/content/en/preview/reference/metrics.md +++ b/website/content/en/preview/reference/metrics.md @@ -232,6 +232,14 @@ Current depth of workqueue ### `workqueue_adds_total` Total number of adds handled by workqueue +## Status Condition Metrics + +### `operator_status_condition_transition_seconds` +The amount of time a condition was in a given state before transitioning. e.g. Alarm := P99(Updated=False) > 5 minutes + +### `operator_status_condition_count` +The number of an condition for a given object, type and status. e.g. Alarm := Available=False > 0 + ## Client Go Metrics ### `client_go_request_total`