diff --git a/pica-metrologie/docker-compose.yml b/pica-metrologie/docker-compose.yml index 5eff7f1ab9c40037989f63d9b9e80166063ca310..022a8bc80c8bed936158155cac721d5404ba4152 100644 --- a/pica-metrologie/docker-compose.yml +++ b/pica-metrologie/docker-compose.yml @@ -78,12 +78,12 @@ services: # Where to read metrics - "-datasource.url=http://victoria-metrics:8428" # Where to write and read alert states, to keep - # state during restart, as vmagent stores states in memory + # state during restart, as vmalert stores states in memory - "-remoteWrite.url=http://victoria-metrics:8428" - "-remoteRead.url=http://victoria-metrics:8428" # Where to send alert when they must be triggered - "-notifier.url=http://alertmanager:9093" - # HTTP server for vmagent's own metrics + # HTTP server for vmalert's own metrics - "-httpListenAddr=:8880" # By default, evaluate rules every 1 minute - "-evaluationInterval=1m" diff --git a/pica-metrologie/vmalert-rules.yml b/pica-metrologie/vmalert-rules.yml index e9b10e07903169d14c0beb1edb8da714bf8c1a55..1396e1ce86ca084d7ab5baaf580393f3f0ac96e4 100644 --- a/pica-metrologie/vmalert-rules.yml +++ b/pica-metrologie/vmalert-rules.yml @@ -68,7 +68,10 @@ groups: summary: Proxmox HDD volume 90% full description: Proxmox HDD volume ({{ $labels.storage }}) on {{ $labels.instance }} is {{ $value }}% full - alert: DiskDamaged - expr: smartmon_device_smart_healthy != 1 + # Only get values from real disks so ignore VMs + # This is hardcoded but I cannot see other way to do so because VMs do no have a specific prefix + # We must add new machines here + expr: smartmon_device_smart_healthy{node=~"alice|bob"} != 1 labels: severity: critical annotations: