diff --git a/.gitignore b/.gitignore index 9e30eb9b..c9a0a77c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -*.tgz \ No newline at end of file +*.tgz +Chart.lock \ No newline at end of file diff --git a/charts/pga/Chart.yaml b/charts/pga/Chart.yaml new file mode 100644 index 00000000..353086cc --- /dev/null +++ b/charts/pga/Chart.yaml @@ -0,0 +1,56 @@ +apiVersion: v2 +name: pga +description: A Helm chart for prometheus, grafana and alertmanager +type: application +version: 1.0.0 +appVersion: 1.0.0 +maintainers: + - name: ashwani-opstree + +dependencies: + - name: kube-prometheus-stack + version: 61.3.1 + repository: https://prometheus-community.github.io/helm-charts/ + alias: app + tags: + - monitoring + condition: app.enabled + + - name: kube-prometheus-stack + version: 61.3.1 + repository: https://prometheus-community.github.io/helm-charts/ + alias: kube + tags: + - monitoring + + - name: prometheus-adapter + version: 4.10.0 + repository: https://prometheus-community.github.io/helm-charts/ + tags: + - monitoring + alias: adapter + condition: adapter.enabled + + - name: prometheus-pushgateway + version: 2.14.0 + repository: https://prometheus-community.github.io/helm-charts/ + tags: + - monitoring + alias: pushgateway + condition: pushgateway.enabled + + - name: prometheus-blackbox-exporter + version: 8.17.0 + repository: https://prometheus-community.github.io/helm-charts/ + tags: + - blackbox + alias: blackbox + condition: blackbox.enabled + + - name: thanos + version: 15.7.12 + repository: https://charts.bitnami.com/bitnami + tags: + - thanos + alias: thanos + condition: thanos.enabled diff --git a/charts/pga/README.md b/charts/pga/README.md new file mode 100644 index 00000000..5daae401 --- /dev/null +++ b/charts/pga/README.md @@ -0,0 +1,36 @@ +# Prometheus Monitoring Setup with Helm + +This document provides detailed instructions for setting up Prometheus monitoring in a Kubernetes cluster using Helm charts. Follow these commands to deploy Prometheus and its associated components. + +## 1. Apply Custom Resource Definitions (CRDs) + +Run the following commands to apply each CRD: + +```bash +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagers.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-alertmanagerconfigs.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-podmonitors.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-probes.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusagents.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-prometheuses.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-prometheusrules.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-scrapeconfigs.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-servicemonitors.yaml +kubectl apply --server-side=true -f https://raw.githubusercontent.com/prometheus-community/helm-charts/kube-prometheus-stack-61.5.0/charts/kube-prometheus-stack/charts/crds/crds/crd-thanosrulers.yaml +``` +## 2. Update Helm Chart Dependencies +```bash +helm dep update +``` +Updates Helm chart dependencies. + +## 3. Create a Namespace for Monitoring +```bash +kubectl create ns monitoring +``` +Creates a Kubernetes namespace named monitoring. + +## 4. Render chart templates locally and apply +```bash +helm template --name-template=monitoring . -n monitoring -f values.yaml | kubectl apply -f - +``` \ No newline at end of file diff --git a/charts/pga/examples/thanos/pga.yaml b/charts/pga/examples/thanos/pga.yaml new file mode 100644 index 00000000..f26a477b --- /dev/null +++ b/charts/pga/examples/thanos/pga.yaml @@ -0,0 +1,19 @@ +app: + enabled: false + +kube: + enabled: true + grafana: + enabled: true + sidecar: + datasources: + defaultDatasourceEnabled: false + +pushgateway: + enabled: false + +blackbox: + enabled: false + +adapter: + enabled: true \ No newline at end of file diff --git a/charts/pga/examples/thanos/values.yaml b/charts/pga/examples/thanos/values.yaml new file mode 100644 index 00000000..75b06187 --- /dev/null +++ b/charts/pga/examples/thanos/values.yaml @@ -0,0 +1,257 @@ +app: + enabled: false + +kube: + enabled: true + fullnameOverride: kube + commonLabels: + prometheus: kube + defaultRules: + create: false + alertmanager: + enabled: true + alertmanagerSpec: + retention: 240h + resources: + requests: + cpu: 250m + memory: 500Mi + limits: + cpu: 250m + memory: 500Mi + storage: + volumeClaimTemplate: + spec: + # storageClassName: encrypted-gp3 + accessModes: [ "ReadWriteOnce" ] + resources: + requests: + storage: 1Gi + grafana: + enabled: true + sidecar: + datasources: + defaultDatasourceEnabled: false + kubeApiServer: + enabled: true + kubelet: + enabled: true + namespace: kube-system + kubeControllerManager: + enabled: false + coreDns: + enabled: true + kubeEtcd: + enabled: false + kubeScheduler: + enabled: false + kubeProxy: + enabled: false + kubeStateMetrics: + enabled: true + kube-state-metrics: + customLabels: + prometheus: kube + enabled: true + podSecurityPolicy: + enabled: false + resources: + requests: + cpu: 250m + memory: 500Mi + limits: + cpu: 250m + memory: 500Mi + nodeExporter: + enabled: true + prometheus-node-exporter: + prometheus: + monitor: + additionalLabels: + prometheus: kube + # rbac: + # pspEnabled: false + # image: + # repository: + # tag: latest + # pullPolicy: Always + prometheusOperator: + enabled: true + admissionWebhooks: + enabled: false + deployment: + enabled: true + tls: + enabled: false + prometheus: + enabled: true + thanosService: + enabled: true + thanosServiceMonitor: + enabled: true + prometheusSpec: + externalLabels: + kkubernetes_cluster: opstree + prometheus_cluster: kube + # get more details https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.ThanosSpec + thanos: + version: 0.35.1 + # image: quay.io/thanos/thanos:v0.35.1 + blockSize: 5m + objectStorageConfig: + existingSecret: + key: objstore.yml + name: monitoring-thanos-objstore-secret + # nodeSelector: + # appType: monitoring + # tolerations: + # - key: "appType" + # operator: "Equal" + # value: "monitoring" + # effect: "NoSchedule" +# remoteWrite: +# - url: https://app.last9.io/jupiter/prometheus/write +# basicAuth: +# username: +# name: promsecret +# key: username +# password: +# name: promsecret +# key: password +## # Do not add the writeRelabelConfigs section if you want to +## # send all metrics via remote write +## writeRelabelConfigs: +# - sourceLabels: [ __name__ ] +# regex: 'istio*' +# action: keep + # image: + # tag: v2.41.0 + retention: 1h + replicas: 2 + # externalUrl: "http://kube-opstree.prod.internal/" + resources: + requests: + cpu: "500m" + memory: 500Mi + limits: + cpu: "500m" + memory: 500Mi + storageSpec: + volumeClaimTemplate: + spec: + # storageClassName: encrypted-gp3 + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serviceMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - kube + podMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - kube + ruleSelector: + matchLabels: + prometheus: kube + service: + name: kube-prometheus + + +pushgateway: + enabled: false + serviceMonitor: + enabled: true + namespace: monitoring + additionalLabels: + prometheus: app + extraArgs: + - --log.level=debug + - --push.disable-consistency-check + resources: + limits: + cpu: 1 + memory: 4096Mi + requests: + cpu: 500m + memory: 4096Mi + +blackbox: + enabled: false + serviceMonitor: + enabled: true + defaults: + additionalMetricsRelabels: {} + labels: + prometheus: app + interval: 30s + scrapeTimeout: 30s + module: http_2xx + config: + modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: [ "HTTP/1.0", "HTTP/1.1", "HTTP/2.0" ] + no_follow_redirects: false + preferred_ip_protocol: "ip4" + fail_if_ssl: false + fail_if_not_ssl: false + +adapter: + enabled: false + +thanos: + enabled: true + objstoreConfig: |- + type: s3 + config: + bucket: thanos + endpoint: monitoring-minio.monitoring.svc.cluster.local:9000 + access_key: minio + secret_key: minio123 + insecure: true + query: + dnsDiscovery: + sidecarsService: kube-thanos-discovery + sidecarsNamespace: monitoring + bucketweb: + enabled: true + compactor: + enabled: false + storegateway: + enabled: true + ruler: + enabled: true + serviceMonitor: + namespace: monitoring + alertmanagers: + - http://kube-alertmanager.monitoring.svc.cluster.local:9093 + config: |- + groups: + - name: "metamonitoring" + rules: + - alert: "PrometheusDown" + expr: absent(up{prometheus="monitoring/kube-prometheus"}) + metrics: + enabled: true + serviceMonitor: + namespace: monitoring + enabled: true + minio: + enabled: true + auth: + rootPassword: minio123 + rootUser: minio + monitoringBuckets: thanos + accessKey: + password: minio + secretKey: + password: minio123 \ No newline at end of file diff --git a/charts/pga/templates/datasources/alertmanager.yaml b/charts/pga/templates/datasources/alertmanager.yaml new file mode 100644 index 00000000..1a67481e --- /dev/null +++ b/charts/pga/templates/datasources/alertmanager.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-alertmanager-datasource + namespace: monitoring + labels: + grafana_datasource: "1" + app: kube-grafana + prometheus: kube +data: + kube-alertmanager.yaml: |- + apiVersion: 1 + datasources: + - name: "kube-alertmanager" + type: alertmanager + uid: alertmanager + url: http://kube-alertmanager.monitoring:9093/ + access: proxy + jsonData: + handleGrafanaManagedAlerts: false + implementation: prometheus diff --git a/charts/pga/templates/datasources/prometheus.yaml b/charts/pga/templates/datasources/prometheus.yaml new file mode 100644 index 00000000..6265b32d --- /dev/null +++ b/charts/pga/templates/datasources/prometheus.yaml @@ -0,0 +1,24 @@ +{{- if not .Values.thanos.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-prometheus-datasource + namespace: monitoring + labels: + grafana_datasource: "1" + app: kube-grafana + prometheus: kube +data: + kube-prometheus.yaml: |- + apiVersion: 1 + datasources: + - name: "kube" + type: prometheus + uid: prometheus + url: http://kube-prometheus.monitoring:9090/ + access: proxy + isDefault: true + jsonData: + httpMethod: POST + timeInterval: 30s +{{- end }} diff --git a/charts/pga/templates/datasources/tempo.yaml b/charts/pga/templates/datasources/tempo.yaml new file mode 100644 index 00000000..2505df07 --- /dev/null +++ b/charts/pga/templates/datasources/tempo.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: tempo-datasource + namespace: monitoring + labels: + grafana_datasource: "1" + app: kube-grafana + prometheus: kube +data: + tempo.yaml: |- + apiVersion: 1 + datasources: + - name: "tempo" + type: tempo + uid: tempo + url: http://tempo.observability.svc.cluster.local:3100/ + access: proxy + jsonData: + handleGrafanaManagedAlerts: false + implementation: prometheus diff --git a/charts/pga/templates/datasources/thanos.yaml b/charts/pga/templates/datasources/thanos.yaml new file mode 100644 index 00000000..3f6df974 --- /dev/null +++ b/charts/pga/templates/datasources/thanos.yaml @@ -0,0 +1,23 @@ +{{- if .Values.thanos.enabled }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: kube-thanos-datasource + namespace: monitoring + labels: + grafana_datasource: "1" + app: kube-grafana + prometheus: kube +data: + kube-thanos.yaml: |- + apiVersion: 1 + datasources: + - name: "kube-thanos" + type: prometheus + uid: thanos + url: http://monitoring-thanos-query-frontend.monitoring:9090/ + access: proxy + jsonData: + httpMethod: POST + timeInterval: 30s +{{- end }} diff --git a/charts/pga/values.yaml b/charts/pga/values.yaml new file mode 100644 index 00000000..b436cc53 --- /dev/null +++ b/charts/pga/values.yaml @@ -0,0 +1,311 @@ +app: + enabled: false + fullnameOverride: app + commonLabels: + prometheus: app + defaultRules: + create: false + alertmanager: + enabled: false + grafana: + enabled: false + kubeApiServer: + enabled: false + kubelet: + enabled: false + kubeControllerManager: + enabled: false + coreDns: + enabled: false + kubeEtcd: + enabled: false + kubeScheduler: + enabled: false + kubeProxy: + enabled: false + kubeStateMetrics: + enabled: false + kube-state-metrics: + enabled: false + nodeExporter: + enabled: false + prometheusOperator: + enabled: false + admissionWebhooks: + enabled: false + configReloaderCpu: 300m + configReloaderMemory: 300Mi + prometheus: + enabled: true + prometheusSpec: + # nodeSelector: + # appType: monitoring + # tolerations: + # - key: "appType" + # operator: "Equal" + # value: "monitoring" + # effect: "NoSchedule" + retention: 30d + replicas: 1 + # externalUrl: "" + resources: + requests: + cpu: "1" + memory: 1Gi + limits: + cpu: "1" + memory: 1Gi + storageSpec: + volumeClaimTemplate: + spec: + # storageClassName: encrypted-gp3 + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + alertingEndpoints: + - name: kube-alertmanager + namespace: monitoring + port: web + pathPrefix: / + apiVersion: v2 + serviceMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - app + podMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - app + ruleSelector: + matchLabels: + prometheus: app + additionalScrapeConfigs: + - job_name: kubernets-servics-probe + metrics_path: /probe + params: + module: + - http_2xx + kubernetes_sd_configs: + - role: service + scrape_interval: 30s + scrape_timeout: 25s + relabel_configs: + - source_labels: + - __meta_kubernetes_service_annotation_prometheus_io_probe + regex: true + action: keep + - source_labels: + - __meta_kubernetes_service_name + target_label: service + - source_labels: + - __address__,__meta_kubernetes_service_annotation_prometheus_io_path + regex: (.+);(.+) + target_label: __param_target + replacement: ${1}${2} + - source_labels: + - __param_target ] + target_label: instance + - source_labels: [] + target_label: __address__ + replacement: monitoring-prometheus-blackbox-exporter:9115 + service: + name: app-prometheus + +kube: + fullnameOverride: kube + commonLabels: + prometheus: kube + defaultRules: + create: false + alertmanager: + enabled: true + alertmanagerSpec: + retention: 240h + resources: + requests: + cpu: 250m + memory: 500Mi + limits: + cpu: 250m + memory: 500Mi + storage: + volumeClaimTemplate: + spec: + # storageClassName: encrypted-gp3 + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + grafana: + enabled: false + kubeApiServer: + enabled: true + kubelet: + enabled: true + namespace: kube-system + kubeControllerManager: + enabled: false + coreDns: + enabled: true + kubeEtcd: + enabled: false + kubeScheduler: + enabled: false + kubeProxy: + enabled: false + kubeStateMetrics: + enabled: true + kube-state-metrics: + customLabels: + prometheus: kube + enabled: true + podSecurityPolicy: + enabled: false + resources: + requests: + cpu: 250m + memory: 500Mi + limits: + cpu: 250m + memory: 500Mi + nodeExporter: + enabled: true + prometheus-node-exporter: + prometheus: + monitor: + additionalLabels: + prometheus: kube + # rbac: + # pspEnabled: false + # image: + # repository: + # tag: latest + # pullPolicy: Always + prometheusOperator: + enabled: true + admissionWebhooks: + enabled: false + deployment: + enabled: true + tls: + enabled: false + prometheus: + enabled: true + prometheusSpec: + # nodeSelector: + # appType: monitoring + # tolerations: + # - key: "appType" + # operator: "Equal" + # value: "monitoring" + # effect: "NoSchedule" +# remoteWrite: +# - url: https://app.last9.io/jupiter/prometheus/write +# basicAuth: +# username: +# name: promsecret +# key: username +# password: +# name: promsecret +# key: password +## # Do not add the writeRelabelConfigs section if you want to +## # send all metrics via remote write +## writeRelabelConfigs: +# - sourceLabels: [ __name__ ] +# regex: 'istio*' +# action: keep + # image: + # tag: v2.41.0 + retention: 30d + replicas: 1 + # externalUrl: "http://kube-opstree.prod.internal/" + resources: + requests: + cpu: "500m" + memory: 500Mi + limits: + cpu: "500m" + memory: 500Mi + storageSpec: + volumeClaimTemplate: + spec: + # storageClassName: encrypted-gp3 + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: 1Gi + serviceMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - kube + podMonitorSelector: + matchExpressions: + - key: prometheus + operator: In + values: + - kube + ruleSelector: + matchLabels: + prometheus: kube + service: + name: kube-prometheus + +pushgateway: + enabled: false + serviceMonitor: + enabled: true + namespace: monitoring + additionalLabels: + prometheus: app + extraArgs: + - --log.level=debug + - --push.disable-consistency-check + resources: + limits: + cpu: 1 + memory: 4096Mi + requests: + cpu: 500m + memory: 4096Mi + +blackbox: + enabled: false + serviceMonitor: + enabled: true + defaults: + additionalMetricsRelabels: {} + labels: + prometheus: app + interval: 30s + scrapeTimeout: 30s + module: http_2xx + config: + modules: + http_2xx: + prober: http + timeout: 5s + http: + valid_http_versions: + - "HTTP/1.0" + - "HTTP/1.1" + - "HTTP/2.0" + no_follow_redirects: false + preferred_ip_protocol: "ip4" + fail_if_ssl: false + fail_if_not_ssl: false + +adapter: + enabled: false + +thanos: + enabled: false diff --git a/ct.yaml b/ct.yaml index 0a93f0cc..4c3b677e 100644 --- a/ct.yaml +++ b/ct.yaml @@ -7,3 +7,4 @@ chart-repos: - ot-helm=https://ot-container-kit.github.io/helm-charts excluded-charts: - mysql + - pga