## Docker 容器化部署 ### 新增文件 - Dockerfile: 多阶段构建镜像,支持 Java 21 - docker-compose.yml: 完整服务编排配置 - 基础设施: MySQL 8.0, Redis 7, Nacos 3.0 - 监控: Prometheus, Grafana - 业务服务: Gateway + 9个微服务 - docker/.env: 环境变量配置 - docker/mysql/init/01-init.sql: 数据库初始化脚本 ### Docker 特性 - 多阶段构建优化镜像大小 - 非 root 用户运行服务 - 健康检查配置 - 统一时区设置 (Asia/Shanghai) ## Prometheus + Grafana 监控 ### Prometheus 配置 - docker/prometheus/prometheus.yml: 服务发现配置 - docker/prometheus/rules/alerts.yml: 告警规则 - 服务可用性告警 - JVM 内存告警 - HTTP 请求告警 - 数据库连接池告警 - 系统资源告警 ### Grafana 配置 - docker/grafana/provisioning/: 数据源和Dashboard自动导入 - docker/grafana/dashboards/fund-platform-dashboard.json - 服务概览面板 - JVM 内存监控 - 数据库连接池监控 ### Spring Boot Actuator 集成 - pom.xml: 添加 spring-boot-starter-actuator 和 micrometer-registry-prometheus - application-docker.yml: Prometheus 端点配置 ## 服务端口规划 - Gateway: 8000 - fund-sys: 8100 - fund-cust: 8200 - fund-proj: 8300 - fund-req: 8400 - fund-exp: 8500 - fund-receipt: 8600 - fund-report: 8700 - fund-file: 8800 - Prometheus: 9090 - Grafana: 3000 - Nacos: 8848
129 lines
4.6 KiB
YAML
129 lines
4.6 KiB
YAML
# Prometheus 告警规则
|
|
# 资金服务平台
|
|
|
|
groups:
|
|
# ==================== 服务可用性告警 ====================
|
|
- name: service_availability
|
|
rules:
|
|
# 服务宕机告警
|
|
- alert: ServiceDown
|
|
expr: up == 0
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "服务 {{ $labels.job }} 宕机"
|
|
description: "服务 {{ $labels.instance }} 已经宕机超过 1 分钟"
|
|
|
|
# 健康检查失败
|
|
- alert: HealthCheckFailed
|
|
expr: spring_boot_health_status{status="DOWN"} == 1
|
|
for: 30s
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "服务健康检查失败"
|
|
description: "服务 {{ $labels.application }} 健康状态为 DOWN"
|
|
|
|
# ==================== JVM 内存告警 ====================
|
|
- name: jvm_memory
|
|
rules:
|
|
# 堆内存使用率过高
|
|
- alert: HeapMemoryUsageHigh
|
|
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) * 100 > 85
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "JVM 堆内存使用率过高"
|
|
description: "服务 {{ $labels.application }} 堆内存使用率 {{ $value | printf \"%.2f\" }}%"
|
|
|
|
# 堆内存即将耗尽
|
|
- alert: HeapMemoryCritical
|
|
expr: (jvm_memory_used_bytes{area="heap"} / jvm_memory_max_bytes{area="heap"}) * 100 > 95
|
|
for: 1m
|
|
labels:
|
|
severity: critical
|
|
annotations:
|
|
summary: "JVM 堆内存即将耗尽"
|
|
description: "服务 {{ $labels.application }} 堆内存使用率 {{ $value | printf \"%.2f\" }}%,请立即处理"
|
|
|
|
# GC 时间过长
|
|
- alert: GCTimeTooHigh
|
|
expr: rate(jvm_gc_pause_seconds_sum[5m]) > 0.5
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "GC 时间过长"
|
|
description: "服务 {{ $labels.application }} GC 时间占比过高,可能影响性能"
|
|
|
|
# ==================== HTTP 请求告警 ====================
|
|
- name: http_requests
|
|
rules:
|
|
# 高错误率
|
|
- alert: HighErrorRate
|
|
expr: sum(rate(http_server_requests_seconds_count{status=~"5.."}[5m])) by (application) / sum(rate(http_server_requests_seconds_count[5m])) by (application) > 0.05
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "HTTP 请求错误率过高"
|
|
description: "服务 {{ $labels.application }} 5xx 错误率 {{ $value | printf \"%.2f\" }}%"
|
|
|
|
# 响应时间过长
|
|
- alert: HighResponseTime
|
|
expr: histogram_quantile(0.95, sum(rate(http_server_requests_seconds_bucket[5m])) by (le, application)) > 2
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "HTTP 响应时间过长"
|
|
description: "服务 {{ $labels.application }} P95 响应时间 {{ $value | printf \"%.2f\" }} 秒"
|
|
|
|
# ==================== 数据库连接池告警 ====================
|
|
- name: database_connections
|
|
rules:
|
|
# HikariCP 连接池使用率过高
|
|
- alert: HikariPoolUsageHigh
|
|
expr: (hikaricp_connections_active / hikaricp_connections_max) * 100 > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "数据库连接池使用率过高"
|
|
description: "服务 {{ $labels.application }} 连接池使用率 {{ $value | printf \"%.2f\" }}%"
|
|
|
|
# 连接池等待
|
|
- alert: HikariPoolPending
|
|
expr: hikaricp_connections_pending > 0
|
|
for: 2m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "数据库连接池存在等待"
|
|
description: "服务 {{ $labels.application }} 有 {{ $value }} 个连接请求在等待"
|
|
|
|
# ==================== 系统资源告警 ====================
|
|
- name: system_resources
|
|
rules:
|
|
# CPU 使用率过高
|
|
- alert: HighCPUUsage
|
|
expr: system_cpu_usage * 100 > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "CPU 使用率过高"
|
|
description: "服务 {{ $labels.application }} CPU 使用率 {{ $value | printf \"%.2f\" }}%"
|
|
|
|
# 进程打开文件描述符过多
|
|
- alert: HighFileDescriptorUsage
|
|
expr: process_files_open_files / process_files_max_files * 100 > 80
|
|
for: 5m
|
|
labels:
|
|
severity: warning
|
|
annotations:
|
|
summary: "文件描述符使用率过高"
|
|
description: "服务 {{ $labels.application }} 文件描述符使用率 {{ $value | printf \"%.2f\" }}%"
|