Kubernetes API Monitoring
Kubernetes API Monitoring Rules
The Prometheus agent is configured to report numerous K8s metrics. Below is the YAML-based rule set that Catapult uses, including alert names, expression, timeframe, labels, and annotations which contain the description and summaries.
x
bash-5.0# cat k8s.yml
groups
name kube-apiserver
rules
#----------------------------------- API server -------------------------------------
alert KubeAPIServerDown
expr up job="kube-apiserver" offset 5m == 0
for 1m
labels
severity critical
type k8s
annotations
description Kubernetes API server down on cluster $labels.cluster
summary"Cluster {{ $labels.cluster }}: Kube api server down on host {{ $labels.host }}"
alert KubernetesApiServerErrors
expr sum(rate(apiserver_request_total code=~"(4|5).." 1m offset 5m )) by (cluster, host) / sum(rate(apiserver_request_total 1m offset 5m )) by (cluster, host) * 100 > 3
for 2m
labels
severity critical
type k8s
annotations
summary Kubernetes API server errors on cluster $labels.cluster
description"Cluster {{ $labels.cluster }}: Kubernetes API server is experiencing high error rate on host {{ $labels.host }}"
alert KubernetesApiClientErrors
expr (sum(rate(rest_client_requests_total code=~"(4|5).." 1m offset 5m )) by (cluster, host) / sum(rate(rest_client_requests_total 1m offset 5m )) by (cluster, host)) * 100 > 1
for 2m
labels
severity critical
type k8s
annotations
summary Kubernetes API client errors on cluster $labels.cluster
description"Cluster {{ $labels.cluster }}: Kubernetes API client is experiencing high error rate on host {{ $labels.host }}"
#----------------------------------- Scheduler -------------------------------------
alert KubeSchedulerDown
expr up job="kube-scheduler" offset 5m == 0
for 1m
labels
severity critical
type k8s
annotations
summary Kubernetes Scheduler down on cluster $labels.cluster
description"Cluster {{ $labels.cluster }}: Kube scheduler down on host {{ $labels.host }}"
#----------------------------------- Controller -------------------------------------
alert KubeControllerManagerDown
expr up job="kube-controller" offset 5m == 0
for 1m
labels
severity critical
type k8s
annotations
summary Kubernetes Controller down on cluster $labels.cluster
description"Cluster {{ $labels.cluster }}: Kube controller down on host {{ $labels.host }}"
#----------------------------------- KubeProxy -------------------------------------
alert KubeProxyDown
expr up job="kube-proxy" offset 5m == 0
for 1m
labels
severity critical
type k8s
annotations
summary KubeProxy down on cluster $labels.cluster
description"Cluster {{ $labels.cluster }}: kube proxy down on host {{ $labels.host }}"
alert KubeProxyRuleSyncLatency
expr histogram_quantile(0.99, sum by(le, cluster, host) (rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket 5m offset 5m ))) > 60
for 2m
labels
severity warning
type k8s
annotations
summary Cluster $labels.cluster is taking too long, on average, to apply kubernetes service rules to iptables.
description"Cluster {{ $labels.cluster }}: network rules synchronization slowing down, VALUE = {{ $value }} on host {{ $labels.host }}"bash-5.0#
Was this page helpful?