Browse Source

doc: exclude 404 error because kubelet generating false positive

Dmitry Verkhoturov 7 years ago
parent
commit
0929080834

+ 2 - 2
Documentation/etcd-mixin/mixin.libsonnet

@@ -151,7 +151,7 @@
           {
             alert: 'etcdHighNumberOfFailedHTTPRequests',
             expr: |||
-              sum(rate(etcd_http_failed_total{%(etcd_selector)s}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
+              sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
               BY (method) > 0.01
             ||| % $._config,
             'for': '10m',
@@ -165,7 +165,7 @@
           {
             alert: 'etcdHighNumberOfFailedHTTPRequests',
             expr: |||
-              sum(rate(etcd_http_failed_total{%(etcd_selector)s}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
+              sum(rate(etcd_http_failed_total{%(etcd_selector)s, code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{%(etcd_selector)s}[5m]))
               BY (method) > 0.05
             ||| % $._config,
             'for': '10m',

+ 2 - 2
Documentation/op-guide/etcd3_alert.rules.yml

@@ -107,7 +107,7 @@ groups:
       message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
         instance {{ $labels.instance }}'
     expr: |
-      sum(rate(etcd_http_failed_total{job=~".*etcd.*"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
       BY (method) > 0.01
     for: 10m
     labels:
@@ -117,7 +117,7 @@ groups:
       message: '{{ $value }}% of requests for {{ $labels.method }} failed on etcd
         instance {{ $labels.instance }}.'
     expr: |
-      sum(rate(etcd_http_failed_total{job=~".*etcd.*"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
+      sum(rate(etcd_http_failed_total{job=~".*etcd.*", code!="404"}[5m])) BY (method) / sum(rate(etcd_http_received_total{job=~".*etcd.*"}[5m]))
       BY (method) > 0.05
     for: 10m
     labels: