Skip to content

Commit

Permalink
chore: Add operator status condition metrics (#6482)
Browse files Browse the repository at this point in the history
Co-authored-by: Reed Schalo <[email protected]>
  • Loading branch information
jonathan-innis and rschalo authored Jul 11, 2024
1 parent f56bcdc commit 092e0ae
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 14 deletions.
2 changes: 2 additions & 0 deletions cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ func main() {

op.
WithControllers(ctx, corecontrollers.NewControllers(
op.Manager,
op.Clock,
op.GetClient(),
op.EventRecorder,
Expand All @@ -52,6 +53,7 @@ func main() {
WithWebhooks(ctx, corewebhooks.NewWebhooks()...).
WithControllers(ctx, controllers.NewControllers(
ctx,
op.Manager,
op.Session,
op.Clock,
op.GetClient(),
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ require (
k8s.io/utils v0.0.0-20240102154912-e7106e64919e
knative.dev/pkg v0.0.0-20231010144348-ca8c009405dd
sigs.k8s.io/controller-runtime v0.18.4
sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905
sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac
sigs.k8s.io/yaml v1.4.0
)

Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -763,8 +763,8 @@ sigs.k8s.io/controller-runtime v0.18.4 h1:87+guW1zhvuPLh1PHybKdYFLU0YJp4FhJRmiHv
sigs.k8s.io/controller-runtime v0.18.4/go.mod h1:TVoGrfdpbA9VRFaRnKgk9P5/atA0pMwq+f+msb9M8Sg=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo=
sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0=
sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905 h1:4ulecMwrSJl9EunKwfDGBhY0i3vJOYmI1GWs/JzsrjQ=
sigs.k8s.io/karpenter v0.37.1-0.20240708235259-bcd33e924905/go.mod h1:eqRbKU0hmncoJXhh+MI8sCLYTjKDvoVPzo+myhDjvUI=
sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac h1:8nfNoKGJSAzTMKxweI4DcTADPyMY/oCW2x1qgx3gUVY=
sigs.k8s.io/karpenter v0.37.1-0.20240710172318-86056e48b9ac/go.mod h1:jwEZ2Efxsc0yyNkrDEFN2RduAwlm/s7reIVNblZ8vyM=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4=
sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08=
sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E=
Expand Down
2 changes: 1 addition & 1 deletion hack/docgen.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ AWS_SDK_GO_PROMETHEUS_DIR=$(go list -m -f '{{ .Dir }}' github.com/jonathan-innis
OPERATORPKG_DIR=$(go list -m -f '{{ .Dir }}' github.com/awslabs/operatorpkg)

compatibilitymatrix
go run hack/docs/metrics_gen/main.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}/metrics" website/content/en/preview/reference/metrics.md
go run hack/docs/metrics_gen/main.go pkg/ "${KARPENTER_CORE_DIR}/pkg" "${CONTROLLER_RUNTIME_DIR}/pkg" "${AWS_SDK_GO_PROMETHEUS_DIR}" "${OPERATORPKG_DIR}" website/content/en/preview/reference/metrics.md
go run hack/docs/instancetypes_gen/main.go website/content/en/preview/reference/instance-types.md
go run hack/docs/configuration_gen/main.go website/content/en/preview/reference/settings.md
cd charts/karpenter && helm-docs
21 changes: 12 additions & 9 deletions hack/docs/metrics_gen/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,16 @@ func bySubsystem(metrics []metricInfo) func(i int, j int) bool {
// Higher ordering comes first. If a value isn't designated here then the subsystem will be given a default of 0.
// Metrics without a subsystem come first since there is no designation for the bucket they fall under
subSystemSortOrder := map[string]int{
"": 100,
"nodepool": 10,
"nodeclaims": 9,
"nodes": 8,
"pods": 7,
"workqueue": -1,
"client_go": -1,
"aws_sdk_go": -1,
"leader_election": -2,
"": 100,
"nodepool": 10,
"nodeclaims": 9,
"nodes": 8,
"pods": 7,
"status_condition": -1,
"workqueue": -1,
"client_go": -1,
"aws_sdk_go": -1,
"leader_election": -2,
}

return func(i, j int) bool {
Expand Down Expand Up @@ -314,6 +315,8 @@ func getIdentMapping(identName string) (string, error) {
"metrics.Namespace": metrics.Namespace,
"Namespace": metrics.Namespace,

"MetricNamespace": "operator",
"MetricSubsystem": "status_condition",
"WorkQueueSubsystem": "workqueue",
"DepthKey": "depth",
"AddsKey": "adds_total",
Expand Down
6 changes: 5 additions & 1 deletion pkg/controllers/controllers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ import (
"context"

"github.com/awslabs/operatorpkg/controller"
"github.com/awslabs/operatorpkg/status"
"sigs.k8s.io/controller-runtime/pkg/manager"
"sigs.k8s.io/karpenter/pkg/cloudprovider"

"github.com/aws/karpenter-provider-aws/pkg/apis/v1beta1"
nodeclasshash "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/hash"
nodeclassstatus "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/status"
nodeclasstermination "github.com/aws/karpenter-provider-aws/pkg/controllers/nodeclass/termination"
Expand Down Expand Up @@ -50,7 +53,7 @@ import (
"github.com/aws/karpenter-provider-aws/pkg/providers/subnet"
)

func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder,
func NewControllers(ctx context.Context, mgr manager.Manager, sess *session.Session, clk clock.Clock, kubeClient client.Client, recorder events.Recorder,
unavailableOfferings *cache.UnavailableOfferings, cloudProvider cloudprovider.CloudProvider, subnetProvider subnet.Provider,
securityGroupProvider securitygroup.Provider, instanceProfileProvider instanceprofile.Provider, instanceProvider instance.Provider,
pricingProvider pricing.Provider, amiProvider amifamily.Provider, launchTemplateProvider launchtemplate.Provider, instanceTypeProvider instancetype.Provider) []controller.Controller {
Expand All @@ -63,6 +66,7 @@ func NewControllers(ctx context.Context, sess *session.Session, clk clock.Clock,
nodeclaimtagging.NewController(kubeClient, instanceProvider),
controllerspricing.NewController(pricingProvider),
controllersinstancetype.NewController(instanceTypeProvider),
status.NewController[*v1beta1.EC2NodeClass](kubeClient, mgr.GetEventRecorderFor("karpenter")),
}
if options.FromContext(ctx).InterruptionQueue != "" {
sqsapi := servicesqs.New(sess)
Expand Down
8 changes: 8 additions & 0 deletions website/content/en/preview/reference/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,14 @@ Current depth of workqueue
### `workqueue_adds_total`
Total number of adds handled by workqueue

## Status Condition Metrics

### `operator_status_condition_transition_seconds`
The amount of time a condition was in a given state before transitioning. e.g. Alarm := P99(Updated=False) > 5 minutes

### `operator_status_condition_count`
The number of an condition for a given object, type and status. e.g. Alarm := Available=False > 0

## Client Go Metrics

### `client_go_request_total`
Expand Down

0 comments on commit 092e0ae

Please sign in to comment.