diff --git a/cmd/controller/main.go b/cmd/controller/main.go index 501149c3b431..9a51e4537aef 100644 --- a/cmd/controller/main.go +++ b/cmd/controller/main.go @@ -44,6 +44,7 @@ func main() { op. WithControllers(ctx, corecontrollers.NewControllers( + op.Manager, op.Clock, op.GetClient(), op.EventRecorder, @@ -52,6 +53,7 @@ func main() { WithWebhooks(ctx, corewebhooks.NewWebhooks()...). WithControllers(ctx, controllers.NewControllers( ctx, + op.Manager, op.Session, op.Clock, op.GetClient(), diff --git a/go.mod b/go.mod index d5662bb2b9b0..4cda3bc733d9 100644 --- a/go.mod +++ b/go.mod @@ -31,7 +31,7 @@ require ( k8s.io/utils v0.0.0-20240102154912-e7106e64919e knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd sigs.k8s.io/controller-runtime v0.18.4 - sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905 + sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac sigs.k8s.io/yaml v1.4.0 ) diff --git a/go.sum b/go.sum index 6a77ecf659d0..b53638d5c635 100644 --- a/go.sum +++ b/go.sum @@ -763,8 +763,8 @@ sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHv sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= -sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905 h1:4ulecMwrSJl9EunKwfDGBhY0i3vJOYmI1GWs/JzsrjQ= -sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905/go.mod h1:eqRbKU0hmncoJXhh+MI8sCLYTjKDvoVPzo+myhDjvUI= +sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac h1:8nfNoKGJSAzTMKxweI4DcTADPyMY/oCW2x1qgx3gUVY= +sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac/go.mod h1:jwEZ2Efxsc0yyNkrDEFN2RduAwlm/s7reIVNblZ8vyM= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/hack/docgen.sh b/hack/docgen.sh index 0f44d6c5161d..e868e2607214 100755 --- a/hack/docgen.sh +++ b/hack/docgen.sh @@ -13,7 +13,7 @@ AWS_SDK_GO_PROMETHEUS_DIR=$(go list -m -f '{{ .Dir }}' github.com/jonathan-innis OPERATORPKG_DIR=$(go list -m -f '{{ .Dir }}' github.com/awslabs/operatorpkg) compatibilitymatrix -go run hack/docs/metrics_gen/main.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}/metrics" website/content/en/preview/reference/metrics.md +go run hack/docs/metrics_gen/main.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}" website/content/en/preview/reference/metrics.md go run hack/docs/instancetypes_gen/main.go website/content/en/preview/reference/instance-types.md go run hack/docs/configuration_gen/main.go website/content/en/preview/reference/settings.md cd charts/karpenter && helm-docs diff --git a/hack/docs/metrics_gen/main.go b/hack/docs/metrics_gen/main.go index 00c779bcc0b7..b2fac43c1bcb 100644 --- a/hack/docs/metrics_gen/main.go +++ b/hack/docs/metrics_gen/main.go @@ -176,15 +176,16 @@ func bySubsystem(metrics []metricInfo) func(i int, j int) bool { // Higher ordering comes first. If a value isn't designated here then the subsystem will be given a default of 0. // Metrics without a subsystem come first since there is no designation for the bucket they fall under subSystemSortOrder := map[string]int{ - "": 100, - "nodepool": 10, - "nodeclaims": 9, - "nodes": 8, - "pods": 7, - "workqueue": -1, - "client_go": -1, - "aws_sdk_go": -1, - "leader_election": -2, + "": 100, + "nodepool": 10, + "nodeclaims": 9, + "nodes": 8, + "pods": 7, + "status_condition": -1, + "workqueue": -1, + "client_go": -1, + "aws_sdk_go": -1, + "leader_election": -2, } return func(i, j int) bool { @@ -314,6 +315,8 @@ func getIdentMapping(identName string) (string, error) { "metrics.Namespace": metrics.Namespace, "Namespace": metrics.Namespace, + "MetricNamespace": "operator", + "MetricSubsystem": "status_condition", "WorkQueueSubsystem": "workqueue", "DepthKey": "depth", "AddsKey": "adds_total", diff --git a/pkg/controllers/controllers.go b/pkg/controllers/controllers.go index b270658e95ef..dbeaf727a282 100644 --- a/pkg/controllers/controllers.go +++ b/pkg/controllers/controllers.go @@ -18,8 +18,11 @@ import ( "context" "github.com/awslabs/operatorpkg/controller" + "github.com/awslabs/operatorpkg/status" + "sigs.k8s.io/controller-runtime/pkg/manager" "sigs.k8s.io/karpenter/pkg/cloudprovider" + "github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1" nodeclasshash "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/hash" nodeclassstatus "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status" nodeclasstermination "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/termination" @@ -50,7 +53,7 @@ import ( "github.com/aws/karpenter-provider-aws/pkg/providers/subnet" ) -func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, +func NewControllers(ctx context.Context, mgr manager.Manager, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder, unavailableOfferings *cache.UnavailableOfferings, cloudProvider cloudprovider.CloudProvider, subnetProvider subnet.Provider, securityGroupProvider securitygroup.Provider, instanceProfileProvider instanceprofile.Provider, instanceProvider instance.Provider, pricingProvider pricing.Provider, amiProvider amifamily.Provider, launchTemplateProvider launchtemplate.Provider, instanceTypeProvider instancetype.Provider) []controller.Controller { @@ -63,6 +66,7 @@ func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, nodeclaimtagging.NewController(kubeClient, instanceProvider), controllerspricing.NewController(pricingProvider), controllersinstancetype.NewController(instanceTypeProvider), + status.NewController[*v1beta1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")), } if options.FromContext(ctx).InterruptionQueue != "" { sqsapi := servicesqs.New(sess) diff --git a/website/content/en/preview/reference/metrics.md b/website/content/en/preview/reference/metrics.md index e8da1a2d33c0..9cc551405eba 100644 --- a/website/content/en/preview/reference/metrics.md +++ b/website/content/en/preview/reference/metrics.md @@ -232,6 +232,14 @@ Current depth of workqueue ### `workqueue_adds_total` Total number of adds handled by workqueue +## Status Condition Metrics + +### `operator_status_condition_transition_seconds` +The amount of time a condition was in a given state before transitioning. e.g. Alarm := P99(Updated=False) > 5 minutes + +### `operator_status_condition_count` +The number of an condition for a given object, type and status. e.g. Alarm := Available=False > 0 + ## Client Go Metrics ### `client_go_request_total`