Redis 监控以及告警rules

Redis 监控

1 Exporter 以及Service(服务)部署

1.编写 exporter 和 SVC

apiVersion: apps/v1
kind: Deployment
metadata:
  name: redis-standalone-exporter
  labels:
    app: redis-standalone-exporter
spec:
  replicas: 1
  selector:
    matchLabels:
      app: redis-standalone-exporter
  template:
    metadata:
      labels:
        app: redis-standalone-exporter
    spec:
      containers:
      - name: redis-standalone-exporter
        image: oliver006/redis_exporter:v1.50.0
        imagePullPolicy: IfNotPresent
        # 此处添加redis相关配置,例如:地址、密码等
        args: ["-redis.addr", "redis://10.111.143.64:6379", "-redis.password", "12345"]
        # 如果是监控k8s容器内的Redis,则此处的redis.addr对应的值需要添加DNS前缀,类似下面注释的那样
        # args: ["-redis.addr", "redis-standalone.monitorsoftware:6379", "-redis.password", "admin@123"]
        ports:
        - containerPort: 9121

# SVC 
---
apiVersion: v1
kind: Service
metadata:
  labels:
    app: redis-standalone-exporter
  name: redis-standalone-exporter
spec:
  type: ClusterIP
  ports:
  - name: metrics
    port: 9121
    protocol: TCP
    targetPort: 9121
  selector:
    app: redis-standalone-exporter

2 编写 ServiceMonitor 配置文件

# SM 服务发现
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
  labels:
    app: redis-standalone-exporter
    prometheus: k8s
  name: redis-standalone-exporter
  namespace: monitoring
spec:
  endpoints:
    - interval: 15      # Prometheus 检查时间 15s
      port: metrics
      path: /metrics
      params:
        target:
          # 此处为redis地址值
          - 10.111.143.64:6379
      relabelings:
        - sourceLabels: [__param_target]
          targetLabel: instance
  namespaceSelector:
    matchNames:
      - default
  selector:
    matchLabels:
      app: redis-standalone-exporter

查看监控 targets

查看对应 promQL 语句

redis 告警规则

# 配置redis报警规则

[root@Prometheus-Grafana rules]# pwd
/prometheus/rules

[root@Prometheus-Grafana rules]# vim redis-exporter.rules
groups:
- name: redis集群预警
# 配置redis报警规则

groups:
- name: redis集群预警
  rules:
    - alert: redis节点下线
      expr: 'up{instance=~".*:9121"} == 0'
      for: 5s
      labels:
        severity: critical
      annotations:
        message: '{{ $labels.instance }} redis 监控主节点下线, 请及时处理'
        summary: '{{ $labels.instance }} redis 监控主节点下线'

    - alert: redis服务下线
      expr: 'redis_up{instance=~".*"} == 0'
      for: 20s
      labels:
        severity: WARN
      annotations:
        message: '{{ $labels.instance }} redis 服务下线, 请及时处理'
        summary: '{{ $labels.instance }} redis 服务下线'

    - alert: redis已经24小时未备份
      expr: time() - redis_rdb_last_save_timestamp_seconds > 60 * 60 * 24
      for: 5m
      labels:
        severity: ERROR
      annotations:
        description: 'redis 集群节点: {{$labels.instance}}  redis 已经 24 小时未备份, 请立即处理'
        summary: 'Missing backup (instance {{ $labels.instance }})'

    - alert: redis内存可用内存不足
      expr: redis_mem_fragmentation_ratio < 1
      for: 5m
      labels:
        severity: WARN
      annotations:
        description: 'Redis 当前节点 {{ $labels.instance }} redis内存可用内存不足,请减少key或增加内存'

    - alert: redis内存碎片过大
      expr: redis_mem_fragmentation_ratio > 18
      for: 5m
      labels:
        severity: ERROR
      annotations:
        description: 'Redis 当前节点 {{ $labels.instance }} 内存碎片过大, 当前: {{ $value}}'

    - alert: redis连接被拒绝
      expr: 'increase(redis_rejected_connections_total[1m]) > 0'
      for: 5m
      labels:
        alert_type: 连接被拒绝
        severity: WARN
      annotations:
        description: 'redis 服务连接 {{ $labels.instance }} 被拒绝'

    - alert: redis主节点缺失
      expr: redis_instance_info{role="master"} == 0
      for: 5m
      labels:
        severity: WARN
      annotations:
        summary: "{{ $labels.instance }} redis主节点缺失"
        description: "{{ $labels.instance }} 主节点丢失5分钟"

    - alert: redis副本下线
      expr: delta(redis_connected_slaves[1m]) < 0
      for: 5m
      labels:
        severity: WARN
      annotations:
        description: "redis {{ $labels.instance}} 集群副本下线, 请立即处理"

    - alert: redis连接总数达到总量的85%
      expr: redis_connected_clients > redis_config_maxclients * 0.85
      for: 5m
      labels:
        severity: WARN
      annotations:
        description: "主机: {{ $labels.instance }} 当前连接数: {{ $value }}, 连接总数达到总量的85%"

    - alert: redis连接总数达到总量的95%
      expr: redis_connected_clients > redis_config_maxclients * 0.95
      for: 5m
      labels:
        severity: ERROR
      annotations:
        description: "主机: {{ $labels.instance }} 当前连接数: {{ $value }}, 连接总数达到总量的95%"

    - alert: redis连接数过低
      expr: redis_connected_clients == 0
      for: 5m
      labels:
        severity: WARN
        alert_type: "连接数过低"
        alert_host: "{{ reReplaceAll \":(.*)\" \"\" $labels.instance }}"
      annotations:
        description: "redis 当前: {{ $labels.instance }} 节点无连接"

    - alert: redis连接故障
      expr: irate(redis_blocked_clients{job="redis-sentinel"}[5m]) > 3
      for: 5m
      labels:
        severity: WARN
      annotations:
        description: "当前: {{ $labels.alert_host }} 5分钟内阻塞进程大于 3, 请检查连接服务是否异常"

    - alert: redis低命中率效率低下
      expr: redis_keyspace_hits_total / (redis_keyspace_hits_total + redis_keyspace_misses_total) > 0.95
      for: 5m
      labels:
        severity: ERROR
      annotations:
        description: "当前: {{ $labels.alert_host }} 命中率低下原因: 数据到期和分配给Redis的内存不足,请及时检查内存、数据"

    - alert: redis异常同步
      expr: irate(redis_rdb_changes_since_last_save[60m]) == 1
      for: 60m
      labels:
        severity: ERROR
      annotations:
        description: "当前: {{ $labels.alert_host }} redis 某一台服务异常断开, 同步异常"

    - alert: redis集群连接异常
      expr: redis_master_link_up{master_host=~".*"} == 0
      for: 5m
      labels:
        severity: WARN
      annotations:
        description: "当前: {{ $labels.alert_host }} redis 复制连接当前断开"

  # redis_total_system_memory_bytes 指标缺失
  # - alert: "内存使用大于95%"
  #   expr: redis_memory_used_bytes / redis_total_system_memory_bytes * 100 > 95
  #   for: 5m
  #   labels:
  #     severity: WARN
  #   annotations:
  #     description: "Redis 当前节点 {{ $labels.instance }} 内存已使用 {{ $value }}%"
暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇