From 016af2f507cba62c44d8e21c47a6b4e15fae3a3c Mon Sep 17 00:00:00 2001 From: Quentin Duchemin <quentinduchemin@tuta.io> Date: Sun, 29 Aug 2021 01:10:10 +0200 Subject: [PATCH] Alerts based on SMART values only for physical machines --- pica-metrologie/docker-compose.yml | 4 ++-- pica-metrologie/vmalert-rules.yml | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pica-metrologie/docker-compose.yml b/pica-metrologie/docker-compose.yml index 5eff7f1a..022a8bc8 100644 --- a/pica-metrologie/docker-compose.yml +++ b/pica-metrologie/docker-compose.yml @@ -78,12 +78,12 @@ services: # Where to read metrics - "-datasource.url=http://victoria-metrics:8428" # Where to write and read alert states, to keep - # state during restart, as vmagent stores states in memory + # state during restart, as vmalert stores states in memory - "-remoteWrite.url=http://victoria-metrics:8428" - "-remoteRead.url=http://victoria-metrics:8428" # Where to send alert when they must be triggered - "-notifier.url=http://alertmanager:9093" - # HTTP server for vmagent's own metrics + # HTTP server for vmalert's own metrics - "-httpListenAddr=:8880" # By default, evaluate rules every 1 minute - "-evaluationInterval=1m" diff --git a/pica-metrologie/vmalert-rules.yml b/pica-metrologie/vmalert-rules.yml index e9b10e07..1396e1ce 100644 --- a/pica-metrologie/vmalert-rules.yml +++ b/pica-metrologie/vmalert-rules.yml @@ -68,7 +68,10 @@ groups: summary: Proxmox HDD volume 90% full description: Proxmox HDD volume ({{ $labels.storage }}) on {{ $labels.instance }} is {{ $value }}% full - alert: DiskDamaged - expr: smartmon_device_smart_healthy != 1 + # Only get values from real disks so ignore VMs + # This is hardcoded but I cannot see other way to do so because VMs do no have a specific prefix + # We must add new machines here + expr: smartmon_device_smart_healthy{node=~"alice|bob"} != 1 labels: severity: critical annotations: -- GitLab