Skip to content
This repository has been archived by the owner on Mar 6, 2023. It is now read-only.

Commit

Permalink
Added simple main alert rules
Browse files Browse the repository at this point in the history
  • Loading branch information
rdemachkovych committed Nov 22, 2017
1 parent da0a2de commit 3fba4ba
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 10 deletions.
10 changes: 0 additions & 10 deletions files/rules/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,6 @@ groups:
annotations:
description: '{{$labels.host}} :disk was going to fill up in 4 hours'
summary: Instance {{$labels.host}}:disk was going to fill up
- alert: isaacloudPrometheusDown
expr: up{instance="localhost:9092"} == 0
for: 5m
labels:
severity: critical
annotations:
description: This is critical alert, please check corresponding Prometheus instance.Wrong
or missing data can result in wrong representation and/or interpretation of
collected metrics.
summary: isaacloud Prometheus server is down!
- alert: CriticalNodeLoad
expr: node_load15 > (count(node_cpu{mode="idle"}) WITHOUT (cpu, mode)) * 2
for: 2m
Expand Down
45 changes: 45 additions & 0 deletions files/rules/main_alert.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
groups:
- name: /etc/prometheus/rules/alert.rules
rules:
- alert: InstanceDown
expr: up == 0
for: 5m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.'
summary: Instance {{ $labels.instance }} down
- alert: CriticalCPULoad
expr: (100 * (1 - avg(irate(node_cpu{job="node",mode="idle"}[5m])) BY (instance))) > 96
for: 2m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has Critical High CPU load for more than 1 minutes.'
summary: Instance {{ $labels.instance }} High CPU load
- alert: WarningCPULoad
expr: (100 * (1 - avg(irate(node_cpu{job="node",mode="idle"}[5m])) BY (instance))) > 90
for: 2m
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} of job {{ $labels.job }} has Warning High CPU load for more than 1 minutes.'
summary: Instance {{ $labels.instance }} High CPU load
- alert: CriticalMemoryLoad
expr: (sum(node_memory_MemTotal) - sum(node_memory_MemFree + node_memory_Buffers + node_memory_Cached)) / sum(node_memory_MemTotal) * 100 > 95
for: 5m
labels:
severity: critical
annotations:
description: '{{ $labels.instance }} has has Critical Memory Load more than
5 minutes.'
summary: Instance {{ $labels.instance }} has Critical Memory Load
- alert: WarningMemoryLoad
expr: (sum(node_memory_MemTotal) - sum(node_memory_MemFree + node_memory_Buffers + node_memory_Cached)) / sum(node_memory_MemTotal) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
description: '{{ $labels.instance }} has has Warning Memory Load more than 5
minutes.'
summary: Instance {{ $labels.instance }} has Warning Memory Load

0 comments on commit 3fba4ba

Please sign in to comment.