From 72db3f67ba99531e058cf36a24e8f6fdd028bbb5 Mon Sep 17 00:00:00 2001 From: Quentin Duchemin <quentinduchemin@tuta.io> Date: Sat, 4 Sep 2021 09:43:29 +0200 Subject: [PATCH] [Alerting] Send network alert if errors/drop last at least 30m --- pica-metrologie/vmalert-rules.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pica-metrologie/vmalert-rules.yml b/pica-metrologie/vmalert-rules.yml index 659d6ff8..5e620330 100644 --- a/pica-metrologie/vmalert-rules.yml +++ b/pica-metrologie/vmalert-rules.yml @@ -134,39 +134,39 @@ groups: rules: - alert: ReceiveHighErrors expr: 100 * (rate(node_network_receive_errs_total[2m]) / rate(node_network_receive_packets_total[2m])) > 1 - for: "5m" + for: "30m" labels: severity: warning annotations: summary: Network interface is reporting many receive errors - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% receive errors for 5 minutes.' + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% receive errors for 30 minutes.' dashboard: https://grafana.picasoft.net/d/QPF5l5uZa/network?var-node={{ $labels.instance }} - alert: SendHighErrors expr: 100 * (rate(node_network_transmit_errs_total[2m]) / rate(node_network_transmit_packets_total[2m])) > 1 - for: "5m" + for: "30m" labels: severity: warning annotations: summary: Network interface is reporting many transmit errors - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% transmit errors for 5 minutes.' + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% transmit errors for 30 minutes.' dashboard: https://grafana.picasoft.net/d/QPF5l5uZa/network?var-node={{ $labels.instance }} - alert: ReceiveHighDrop expr: 100 * (rate(node_network_receive_drop_total[2m]) / rate(node_network_receive_packets_total[2m])) > 1 - for: "5m" + for: "30m" labels: severity: warning annotations: summary: Network interface is reporting many receive drops - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% receive drops for 5 minutes.' + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% receive drops for 30 minutes.' dashboard: https://grafana.picasoft.net/d/QPF5l5uZa/network?var-node={{ $labels.instance }} - alert: SendHighDrop expr: 100 * (rate(node_network_transmit_drop_total[2m]) / rate(node_network_transmit_packets_total[2m])) > 1 - for: "5m" + for: "30m" labels: severity: warning annotations: summary: Network interface is reporting many transmit drops - description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% transmit drops for 5 minutes.' + description: '{{ $labels.instance }} interface {{ $labels.device }} has encountered {{ printf "%.0f" $value }}% transmit drops for 30 minutes.' dashboard: https://grafana.picasoft.net/d/QPF5l5uZa/network?var-node={{ $labels.instance }} - name: services -- GitLab