Skip to content

Commit

Permalink
Merge pull request #29 from gravitational/fred/add-helm-chart-2
Browse files Browse the repository at this point in the history
Added Helm chart
  • Loading branch information
fheinecke authored May 30, 2024
2 parents af7d293 + 6d242d3 commit aa6d196
Show file tree
Hide file tree
Showing 12 changed files with 545 additions and 6 deletions.
12 changes: 6 additions & 6 deletions grafana-dashboards/dynamic-limits-dashboard.json
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "sort_desc((max by(quota, account, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", exported_instance=\"\", scope=\"ACCOUNT\"}) / max by(quota, account, service_code) ({__name__=~\"awsquota_.*_limit$\", exported_instance=\"\", scope=\"ACCOUNT\"})) >= $MinQuotaThreashold/100)",
"expr": "sort_desc((max by(quota, account, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", aws_resource=\"\", scope=\"ACCOUNT\"}) / max by(quota, account, service_code) ({__name__=~\"awsquota_.*_limit$\", aws_resource=\"\", scope=\"ACCOUNT\"})) >= $MinQuotaThreashold/100)",
"format": "time_series",
"fullMetaSearch": false,
"hide": false,
Expand Down Expand Up @@ -209,7 +209,7 @@
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "sort_desc((max by(quota, region, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", region=~\"$region\", exported_instance=\"\", scope=\"REGION\"}) / max by(quota, region, service_code) ({__name__=~\"awsquota_.*_limit$\", region=~\"$region\", exported_instance=\"\", scope=\"REGION\"})) >= $MinQuotaThreashold/100)",
"expr": "sort_desc((max by(quota, region, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", region=~\"$region\", aws_resource=\"\", scope=\"REGION\"}) / max by(quota, region, service_code) ({__name__=~\"awsquota_.*_limit$\", region=~\"$region\", aws_resource=\"\", scope=\"REGION\"})) >= $MinQuotaThreashold/100)",
"format": "time_series",
"fullMetaSearch": false,
"hide": false,
Expand All @@ -234,7 +234,7 @@
"color": {
"mode": "thresholds"
},
"displayName": "[${__field.labels.exported_instance}] ${__field.labels.service_code} | ${__field.labels.quota}",
"displayName": "[${__field.labels.aws_resource}] ${__field.labels.service_code} | ${__field.labels.quota}",
"fieldMinMax": false,
"mappings": [],
"max": 1,
Expand Down Expand Up @@ -292,7 +292,7 @@
"disableTextWrap": false,
"editorMode": "code",
"exemplar": false,
"expr": "sort_desc(max by(quota, exported_instance, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", scope=\"INSTANCE\", region=~\"$region\"}) / max by(quota, exported_instance, service_code) ({__name__=~\"awsquota_.*_limit$\", scope=\"INSTANCE\", region=~\"$region\"}) >= $MinQuotaThreashold/100)",
"expr": "sort_desc(max by(quota, aws_resource, service_code) ({__name__=~\"awsquota_.*$\", __name__!~\"awsquota_.*_limit$\", scope=\"INSTANCE\", region=~\"$region\"}) / max by(quota, aws_resource, service_code) ({__name__=~\"awsquota_.*_limit$\", scope=\"INSTANCE\", region=~\"$region\"}) >= $MinQuotaThreashold/100)",
"format": "time_series",
"fullMetaSearch": false,
"hide": false,
Expand Down Expand Up @@ -368,7 +368,7 @@
"$__all"
]
},
"definition": "label_values(awsquota_check_count,region)",
"definition": "label_values(awsquota_info,region)",
"hide": 0,
"includeAll": true,
"label": "region",
Expand All @@ -377,7 +377,7 @@
"options": [],
"query": {
"qryType": 1,
"query": "label_values(awsquota_check_count,region)",
"query": "label_values(awsquota_info,region)",
"refId": "PrometheusVariableQueryEditor-VariableQuery"
},
"refresh": 1,
Expand Down
22 changes: 22 additions & 0 deletions helm/Chart.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
apiVersion: v2
name: aws-quota-checker
description: AWS quota checker for Prometheus
# A chart can be either an 'application' or a 'library' chart.
#
# Application charts are a collection of templates that can be packaged into versioned archives
# to be deployed.
#
# Library charts provide useful utilities or functions for the chart developer. They're included as
# a dependency of application charts to inject those utilities and functions into the rendering
# pipeline. Library charts do not define any templates and therefore cannot be deployed.
type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.0-dev
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.0.0-dev"
1 change: 1 addition & 0 deletions helm/grafana-dashboards
99 changes: 99 additions & 0 deletions helm/templates/_helpers.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
{{/*
Expand the name of the chart.
*/}}
{{- define "aws-quota-checker.name" -}}
{{- default $.Chart.Name $.Values.nameOverride | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "aws-quota-checker.fullname" -}}
{{- if $.Values.fullnameOverride }}
{{- $.Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- $name := default $.Chart.Name $.Values.nameOverride }}
{{- if contains $name $.Release.Name }}
{{- $.Release.Name | trunc 63 | trimSuffix "-" }}
{{- else }}
{{- printf "%s-%s" $.Release.Name $name | trunc 63 | trimSuffix "-" }}
{{- end }}
{{- end }}
{{- end }}

{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "aws-quota-checker.chart" -}}
{{- printf "%s-%s" $.Chart.Name $.Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
{{- end }}

{{/*
Common labels
*/}}
{{- define "aws-quota-checker.commonLabels" -}}
helm.sh/chart: {{ include "aws-quota-checker.chart" . }}
{{ include "aws-quota-checker.selectorCommonLabels" . }}
{{- if $.Chart.AppVersion }}
app.kubernetes.io/version: {{ $.Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ $.Release.Service }}
{{- end }}

{{/*
Selector labels
*/}}
{{- define "aws-quota-checker.selectorCommonLabels" -}}
app.kubernetes.io/name: {{ include "aws-quota-checker.name" . }}
app.kubernetes.io/instance: {{ $.Release.Name }}
{{- end }}

{{/*
Dashboards to deploy
*/}}
{{- define "aws-quota-checker.enabledDashboardsYaml" -}}
{{- $filteredFiles := dict }}
{{- range $fileName, $fileContent := .Files.Glob "grafana-dashboards/*.json" }}
{{- $baseFileName := base $fileName }}
{{- if not (has $baseFileName $.Values.visualization.dashboards.ignoredDashboards) }}
{{- $_ := set $filteredFiles $baseFileName ($fileContent | toString) }}
{{- end }}
{{- end }}
{{- $filteredFiles | toYaml }}
{{- end }}

{{/*
Rule template for request duration
*/}}
{{- define "aws-quota-checker.requestDurationRule" -}}
{{- $alertValues := .AlertValues -}}
{{- $querySuffix := .QuerySuffix -}}
{{- $verbPhrase := .VerbPhrase -}}
{{- $alertName := .AlertName -}}
{{ $ := .Context }}
{{- if $alertValues.enabled }}
- alert: AWSQuota {{- $alertName }}
expr: >-
avg_over_time(
avg by (quota, account, region) (
{
__name__=~"awsquota_.*_{{ $querySuffix }}",
job=~"{{ printf "%s-.*" (include "aws-quota-checker.fullname" $) }}",
namespace="{{ $.Release.Namespace }}"
}
)[{{ $alertValues.duration }}:]
) > {{ $alertValues.thresholdSeconds }}
for: {{ $alertValues.duration }}
annotations:
description: >-
{{ $verbPhrase }} has averaged more than {{ $alertValues.thresholdSeconds }} seconds
over the past {{ $alertValues.duration }}.
summary: {{ $verbPhrase }} is taking too long.
{{- if $.Values.alerting.prometheusRules.additionalLabels }}
labels:
{{- $.Values.alerting.prometheusRules.additionalLabels | toYaml | trim | nindent 12 }}
{{- end }}
{{- end }}
{{- end }}
60 changes: 60 additions & 0 deletions helm/templates/alert-rule/longRunningRule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{{- $enableQuotaRule := false -}}
{{- range $rule := values .Values.alerting.prometheusRules.requestDuration -}}
{{- $enableQuotaRule = or $enableQuotaRule $rule.enabled -}}
{{- end -}}
{{- if $enableQuotaRule -}}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "aws-quota-checker.fullname" . }}.request-rules
labels:
{{- include "aws-quota-checker.commonLabels" . | nindent 4 }}
{{- with .Values.visualization.dashboards.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
groups:
- name: aws_quota_checker.rules.queries
rules:
{{-
include "aws-quota-checker.requestDurationRule" (
dict
"AlertValues" .Values.alerting.prometheusRules.requestDuration.longRunningLimitQuery
"QuerySuffix" "limit_duration_seconds"
"VerbPhrase" "Querying for one or more quota limits"
"AlertName" "LongRunningQuotaLimitQuery"
"Context" $
)
}}
{{-
include "aws-quota-checker.requestDurationRule" (
dict
"AlertValues" .Values.alerting.prometheusRules.requestDuration.longRunningCountQuery
"QuerySuffix" "count_duration_seconds"
"VerbPhrase" "Querying for one or more quota counts"
"AlertName" "LongRunningQuotaCountQuery"
"Context" $
)
}}
{{-
include "aws-quota-checker.requestDurationRule" (
dict
"AlertValues" .Values.alerting.prometheusRules.requestDuration.longRunningLimitQueryTotal
"QuerySuffix" "checks_duration_seconds"
"VerbPhrase" "Querying for all quota limits"
"AlertName" "LongRunningQuotaLimitQueryTotal"
"Context" $
)
}}
{{-
include "aws-quota-checker.requestDurationRule" (
dict
"AlertValues" .Values.alerting.prometheusRules.requestDuration.longRunningCountQueryTotal
"QuerySuffix" "currents_duration_seconds"
"VerbPhrase" "Querying for all quota counts"
"AlertName" "LongRunningQuotaCountQueryTotal"
"Context" $
)
}}
{{- end }}
57 changes: 57 additions & 0 deletions helm/templates/alert-rule/quotaRule.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{{- if gt (len .Values.alerting.prometheusRules.quotas) 0}}
---
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: {{ include "aws-quota-checker.fullname" . }}.quota-rules
labels:
{{- include "aws-quota-checker.commonLabels" . | nindent 4 }}
{{- with .Values.visualization.dashboards.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
spec:
groups:
- name: aws_quota_checker.rules.quotas
rules:
{{- range .Values.alerting.prometheusRules.quotas }}
{{- $quotaName := .quotaName }}
{{- $alertName := .alertName }}
{{- $countQuery := printf "__name__=~\"awsquota_%s\"" $quotaName }}
{{- $limitQuery := printf "__name__=~\"awsquota_%s_limit\"" $quotaName }}
{{- if eq $quotaName "all" }}
{{- $countQuery = "__name__=~\"awsquota_.*\",\n__name__!~\"awsquota_(check_count|info|.*_duration_seconds|.*_limit)\""}}
{{- $limitQuery = "__name__=~\"awsquota_.*_limit\"" }}
{{- end }}
- alert: AWSQuota {{- $alertName }}
expr: >-
sum by (account, region, aws_resource, quota, scope) (
{
{{- $countQuery | nindent 18 }},
job=~"{{ printf "%s-.*" (include "aws-quota-checker.fullname" $) }}",
namespace="{{ $.Release.Namespace }}"
}
)
/
sum by (account, region, aws_resource, quota, scope) (
{
{{- $limitQuery | nindent 18 }},
job=~"{{ printf "%s-.*" (include "aws-quota-checker.fullname" $) }}",
namespace="{{ $.Release.Namespace }}"
}
)
> {{ .threshold }}
{{- if .duration }}
for: {{ .duration }}
{{- end }}
annotations:
description: >-
Quota threshold of {{ mulf .threshold 100 }}% for {{ "{{" }} $labels.quota {{ "}}" }}
{{ "{{" }} if $labels.resource {{ "}}" }} on resource {{ "{{" }} $labels.resource {{ "}}" }}{{ "{{" }} end {{ "}}" }}
in {{ "{{" }} $labels.account {{ "}}" }}/{{ "{{" }}$labels.region{{ "}}" }} has been reached.
summary: Reached quota threshold for {{ "{{" }} $labels.quota {{ "}}" }}
{{- if $.Values.alerting.prometheusRules.additionalLabels }}
labels:
{{- $.Values.alerting.prometheusRules.additionalLabels | toYaml | trim | nindent 12 }}
{{- end }}
{{- end }}
{{- end }}
14 changes: 14 additions & 0 deletions helm/templates/dashboards.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{{- if .Values.visualization.dashboards.enabled }}
---
kind: ConfigMap
apiVersion: v1
metadata:
name: {{ include "aws-quota-checker.fullname" . }}-grafana-dashboards
labels:
{{- include "aws-quota-checker.commonLabels" . | nindent 4 }}
{{- with .Values.visualization.dashboards.additionalLabels }}
{{- toYaml . | nindent 4 }}
{{- end }}
data:
{{ (include "aws-quota-checker.enabledDashboardsYaml" .) | indent 2 }}
{{- end }}
Loading

0 comments on commit aa6d196

Please sign in to comment.