add cAdvisor and document detailed alert queries in README

Add cAdvisor container to the monitoring stack for container-level
metrics. Configure Alloy to scrape cAdvisor. Expand the README
Recommended Alerts section with exact PromQL/LogQL queries, thresholds,
and Grafana alert rule configuration for all five alerts.
This commit is contained in:
2026-03-22 22:51:22 +01:00
parent c736c23e9a
commit 926766346c
3 changed files with 114 additions and 8 deletions

View File

@@ -54,6 +54,18 @@ prometheus.scrape "node" {
scrape_interval = "60s"
}
// ============================================================
// cAdvisor container metrics -> Grafana Cloud Prometheus
// ============================================================
prometheus.scrape "cadvisor" {
targets = [{"__address__" = "cadvisor:8080"}]
forward_to = [prometheus.remote_write.grafana_cloud.receiver]
scrape_interval = "60s"
metrics_path = "/metrics"
}
prometheus.remote_write "grafana_cloud" {
endpoint {
url = env("GRAFANA_CLOUD_PROMETHEUS_URL")

View File

@@ -33,6 +33,27 @@ services:
max-size: "10m"
max-file: "3"
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.52.1
container_name: cadvisor
restart: unless-stopped
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- --docker_only=true
- --housekeeping_interval=30s
- --disable_metrics=accelerator,cpu_topology,disk,diskIO,hugetlb,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp
networks:
- monitoring
logging:
driver: json-file
options:
max-size: "10m"
max-file: "3"
alloy:
image: grafana/alloy:v1.14.1
container_name: alloy