Compare commits
18 Commits
4f3f4b0487
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2281ebcb6d | ||
|
|
2942ff15bc | ||
|
|
24e80de43c | ||
|
|
cfc8b61f98 | ||
|
|
b063128049 | ||
|
|
a07adedd00 | ||
|
|
31705ad888 | ||
|
|
b5c5c11114 | ||
|
|
926766346c | ||
|
|
c736c23e9a | ||
|
|
a02f33e96e | ||
|
|
d62b627093 | ||
|
|
fb1de4f079 | ||
|
|
3bf80f6940 | ||
|
|
1c2fb3c807 | ||
|
|
b918e713e5 | ||
|
|
ac3bff9351 | ||
|
|
0088c11d5e |
92
README.md
92
README.md
@@ -330,15 +330,91 @@ or low cost, and Restic handles encryption + deduplication automatically. A cron
|
|||||||
|
|
||||||
### Recommended Alerts
|
### Recommended Alerts
|
||||||
|
|
||||||
Set these up in Grafana Cloud UI (**Alerting** -> **Alert rules**):
|
Set these up in Grafana Cloud UI (**Alerting** -> **Alert rules** -> **New alert rule**). Choose **Grafana-managed rule**
|
||||||
|
and select the appropriate data source (Prometheus or Loki).
|
||||||
|
|
||||||
| Alert | Condition | Severity |
|
| Alert | Condition | Severity |
|
||||||
|----------------------|-----------------------------------------------------------------------|----------|
|
|----------------------|--------------------------------------|----------|
|
||||||
| Disk usage high | `node_filesystem_avail_bytes` / `node_filesystem_size_bytes` < 0.2 | Critical |
|
| Disk usage high | Available disk < 20% | Critical |
|
||||||
| Container restarting | Container restart count > 3 in 10 min | Warning |
|
| Container restarting | Restart count > 3 in 10 min | Warning |
|
||||||
| High memory usage | `node_memory_MemAvailable_bytes` / `node_memory_MemTotal_bytes` < 0.1 | Warning |
|
| High memory usage | Available memory < 10% | Warning |
|
||||||
| High CPU usage | `node_cpu_seconds_total` idle < 10% sustained 5 min | Warning |
|
| High CPU usage | CPU usage > 90% sustained 5 min | Warning |
|
||||||
| Nextcloud cron stale | No log line from `nextcloud-cron` in 15 min | Warning |
|
| Nextcloud cron stale | No cron log lines in 15 min | Warning |
|
||||||
|
|
||||||
|
#### Disk usage high
|
||||||
|
|
||||||
|
Fires when any filesystem drops below 20% free space.
|
||||||
|
|
||||||
|
- **Data source:** Prometheus
|
||||||
|
- **Query (A):**
|
||||||
|
```promql
|
||||||
|
node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100
|
||||||
|
```
|
||||||
|
- **Expression (B):** Threshold — `A IS BELOW 20`
|
||||||
|
- **Evaluate every:** `1m`
|
||||||
|
- **Pending period (For):** `5m`
|
||||||
|
- **Labels:** `severity: critical`
|
||||||
|
|
||||||
|
#### Container restarting
|
||||||
|
|
||||||
|
Fires when any container restarts more than 3 times in 10 minutes, indicating a crash loop.
|
||||||
|
Detects both in-place restarts (`docker restart`) and ID-changing restarts (`docker compose down/up`).
|
||||||
|
Requires cAdvisor (included in the monitoring stack).
|
||||||
|
|
||||||
|
- **Data source:** Prometheus
|
||||||
|
- **Query (A):**
|
||||||
|
```promql
|
||||||
|
sum by (name) (changes(container_start_time_seconds{name!=""}[10m]))
|
||||||
|
+
|
||||||
|
count by (name) (count_over_time(container_start_time_seconds{name!=""}[10m])) - 1
|
||||||
|
```
|
||||||
|
- **Expression (B):** Threshold — `A IS ABOVE 3`
|
||||||
|
- **Evaluate every:** `1m`
|
||||||
|
- **Pending period (For):** `0s`
|
||||||
|
- **Labels:** `severity: warning`
|
||||||
|
|
||||||
|
#### High memory usage
|
||||||
|
|
||||||
|
Fires when available memory drops below 10% of total.
|
||||||
|
|
||||||
|
- **Data source:** Prometheus
|
||||||
|
- **Query (A):**
|
||||||
|
```promql
|
||||||
|
node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes * 100
|
||||||
|
```
|
||||||
|
- **Expression (B):** Threshold — `A IS BELOW 10`
|
||||||
|
- **Evaluate every:** `1m`
|
||||||
|
- **Pending period (For):** `5m`
|
||||||
|
- **Labels:** `severity: warning`
|
||||||
|
|
||||||
|
#### High CPU usage
|
||||||
|
|
||||||
|
Fires when average CPU usage exceeds 90% for 5 minutes.
|
||||||
|
|
||||||
|
- **Data source:** Prometheus
|
||||||
|
- **Query (A):**
|
||||||
|
```promql
|
||||||
|
avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100
|
||||||
|
```
|
||||||
|
- **Expression (B):** Threshold — `A IS BELOW 10`
|
||||||
|
- **Evaluate every:** `1m`
|
||||||
|
- **Pending period (For):** `5m`
|
||||||
|
- **Labels:** `severity: warning`
|
||||||
|
|
||||||
|
#### Nextcloud cron stale
|
||||||
|
|
||||||
|
Fires when no log output from the `nextcloud-cron` container appears for 15 minutes, indicating background jobs have stopped.
|
||||||
|
|
||||||
|
- **Data source:** Loki
|
||||||
|
- **Query (A):**
|
||||||
|
```logql
|
||||||
|
count_over_time({container="/nextcloud-cron"}[15m])
|
||||||
|
```
|
||||||
|
- **Expression (B):** Threshold — `A IS BELOW 1`
|
||||||
|
- **Alert condition:** also trigger on **No Data**
|
||||||
|
- **Evaluate every:** `5m`
|
||||||
|
- **Pending period (For):** `0s`
|
||||||
|
- **Labels:** `severity: warning`
|
||||||
|
|
||||||
### Recommended Dashboards
|
### Recommended Dashboards
|
||||||
|
|
||||||
|
|||||||
@@ -12,6 +12,11 @@ nextcloud.t-gstone.de {
|
|||||||
reverse_proxy nextcloud-nginx:80
|
reverse_proxy nextcloud-nginx:80
|
||||||
|
|
||||||
header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
|
header Strict-Transport-Security "max-age=31536000; includeSubDomains; preload"
|
||||||
|
header Referrer-Policy "no-referrer"
|
||||||
|
header X-Content-Type-Options "nosniff"
|
||||||
|
header X-Frame-Options "SAMEORIGIN"
|
||||||
|
header X-Permitted-Cross-Domain-Policies "none"
|
||||||
|
header X-Robots-Tag "noindex, nofollow"
|
||||||
|
|
||||||
request_body {
|
request_body {
|
||||||
max_size 10G
|
max_size 10G
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ services:
|
|||||||
image: caddy:2-alpine
|
image: caddy:2-alpine
|
||||||
container_name: caddy
|
container_name: caddy
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- alloy
|
||||||
ports:
|
ports:
|
||||||
- "80:80"
|
- "80:80"
|
||||||
- "443:443"
|
- "443:443"
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ services:
|
|||||||
image: gitea/gitea:1.25.5-rootless
|
image: gitea/gitea:1.25.5-rootless
|
||||||
container_name: gitea
|
container_name: gitea
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- alloy
|
||||||
env_file: .env
|
env_file: .env
|
||||||
volumes:
|
volumes:
|
||||||
- ${DATA_ROOT}/gitea/data:/var/lib/gitea
|
- ${DATA_ROOT}/gitea/data:/var/lib/gitea
|
||||||
|
|||||||
@@ -54,6 +54,18 @@ prometheus.scrape "node" {
|
|||||||
scrape_interval = "60s"
|
scrape_interval = "60s"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// cAdvisor container metrics -> Grafana Cloud Prometheus
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
prometheus.scrape "cadvisor" {
|
||||||
|
targets = [{"__address__" = "cadvisor:8080"}]
|
||||||
|
forward_to = [prometheus.remote_write.grafana_cloud.receiver]
|
||||||
|
|
||||||
|
scrape_interval = "60s"
|
||||||
|
metrics_path = "/metrics"
|
||||||
|
}
|
||||||
|
|
||||||
prometheus.remote_write "grafana_cloud" {
|
prometheus.remote_write "grafana_cloud" {
|
||||||
endpoint {
|
endpoint {
|
||||||
url = env("GRAFANA_CLOUD_PROMETHEUS_URL")
|
url = env("GRAFANA_CLOUD_PROMETHEUS_URL")
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ services:
|
|||||||
- EXEC=0
|
- EXEC=0
|
||||||
- IMAGES=0
|
- IMAGES=0
|
||||||
- INFO=0
|
- INFO=0
|
||||||
- NETWORKS=0
|
- NETWORKS=1
|
||||||
- NODES=0
|
- NODES=0
|
||||||
- PLUGINS=0
|
- PLUGINS=0
|
||||||
- SERVICES=0
|
- SERVICES=0
|
||||||
@@ -33,6 +33,29 @@ services:
|
|||||||
max-size: "10m"
|
max-size: "10m"
|
||||||
max-file: "3"
|
max-file: "3"
|
||||||
|
|
||||||
|
cadvisor:
|
||||||
|
image: gcr.io/cadvisor/cadvisor:v0.54.1
|
||||||
|
container_name: cadvisor
|
||||||
|
restart: unless-stopped
|
||||||
|
privileged: true
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
||||||
|
- /run/containerd/containerd.sock:/run/containerd/containerd.sock:ro
|
||||||
|
- /sys:/sys:ro
|
||||||
|
- /var/lib/docker/:/var/lib/docker:ro
|
||||||
|
command:
|
||||||
|
- --docker_only=true
|
||||||
|
- --housekeeping_interval=30s
|
||||||
|
- --containerd=/run/containerd/containerd.sock
|
||||||
|
- --disable_metrics=cpu_topology,disk,diskIO,hugetlb,memory_numa,network,oom_event,percpu,perf_event,process,referenced_memory,resctrl,sched,tcp,udp
|
||||||
|
networks:
|
||||||
|
- monitoring
|
||||||
|
logging:
|
||||||
|
driver: json-file
|
||||||
|
options:
|
||||||
|
max-size: "10m"
|
||||||
|
max-file: "3"
|
||||||
|
|
||||||
alloy:
|
alloy:
|
||||||
image: grafana/alloy:v1.14.1
|
image: grafana/alloy:v1.14.1
|
||||||
container_name: alloy
|
container_name: alloy
|
||||||
|
|||||||
@@ -57,6 +57,8 @@ services:
|
|||||||
image: postgres:17-alpine
|
image: postgres:17-alpine
|
||||||
container_name: nextcloud-postgres
|
container_name: nextcloud-postgres
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- alloy
|
||||||
env_file: .env
|
env_file: .env
|
||||||
volumes:
|
volumes:
|
||||||
- ${DATA_ROOT}/nextcloud/db:/var/lib/postgresql/data
|
- ${DATA_ROOT}/nextcloud/db:/var/lib/postgresql/data
|
||||||
@@ -77,6 +79,8 @@ services:
|
|||||||
image: redis:8-alpine
|
image: redis:8-alpine
|
||||||
container_name: nextcloud-redis
|
container_name: nextcloud-redis
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
depends_on:
|
||||||
|
- alloy
|
||||||
command: redis-server --requirepass ${REDIS_PASSWORD}
|
command: redis-server --requirepass ${REDIS_PASSWORD}
|
||||||
env_file: .env
|
env_file: .env
|
||||||
networks:
|
networks:
|
||||||
|
|||||||
@@ -6,19 +6,30 @@ map $uri $nonce_uri {
|
|||||||
default "";
|
default "";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
map $arg_v $asset_immutable {
|
||||||
|
"" "";
|
||||||
|
default ", immutable";
|
||||||
|
}
|
||||||
|
|
||||||
server {
|
server {
|
||||||
listen 80;
|
listen 80;
|
||||||
server_name _;
|
server_name _;
|
||||||
|
|
||||||
client_max_body_size 10G;
|
include mime.types;
|
||||||
client_body_timeout 300s;
|
types {
|
||||||
fastcgi_buffers 64 4K;
|
application/javascript mjs;
|
||||||
|
}
|
||||||
|
|
||||||
gzip on;
|
gzip on;
|
||||||
gzip_vary on;
|
gzip_vary on;
|
||||||
gzip_comp_level 4;
|
gzip_comp_level 4;
|
||||||
gzip_min_length 256;
|
gzip_min_length 256;
|
||||||
gzip_types application/javascript application/json text/css text/plain text/xml application/xml image/svg+xml;
|
gzip_proxied any;
|
||||||
|
gzip_types text/plain text/css application/json application/javascript text/xml application/xml image/svg+xml;
|
||||||
|
|
||||||
|
client_max_body_size 10G;
|
||||||
|
client_body_timeout 300s;
|
||||||
|
fastcgi_buffers 64 4K;
|
||||||
|
|
||||||
root /var/www/html;
|
root /var/www/html;
|
||||||
index index.php index.html /index.php$request_uri;
|
index index.php index.html /index.php$request_uri;
|
||||||
@@ -27,27 +38,18 @@ server {
|
|||||||
location ^~ /.well-known {
|
location ^~ /.well-known {
|
||||||
location = /.well-known/carddav { return 301 /remote.php/dav/; }
|
location = /.well-known/carddav { return 301 /remote.php/dav/; }
|
||||||
location = /.well-known/caldav { return 301 /remote.php/dav/; }
|
location = /.well-known/caldav { return 301 /remote.php/dav/; }
|
||||||
location ^~ /.well-known { return 301 /index.php$uri; }
|
location /.well-known/acme-challenge { try_files $uri $uri/ =404; }
|
||||||
|
location /.well-known/pki-validation { try_files $uri $uri/ =404; }
|
||||||
|
return 301 /index.php$request_uri;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Deny access to internal paths
|
# Deny access to internal paths
|
||||||
location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)(?:$|/) { return 404; }
|
location ~ ^/(?:build|tests|config|lib|3rdparty|templates|data)(?:$|/) { return 404; }
|
||||||
location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console) { return 404; }
|
location ~ ^/(?:\.|autotest|occ|issue|indie|db_|console) { return 404; }
|
||||||
|
|
||||||
# Serve static files directly — only if file exists on disk
|
# PHP handling (must be before static file locations so that internal
|
||||||
location ~ \.(?:css|js|mjs|svg|gif|png|jpg|ico|wasm|tflite|map|ogg|flac)$ {
|
# redirects like /index.php/apps/theming/theme/dark.css match here
|
||||||
try_files $uri =404;
|
# instead of cycling back into the static file try_files)
|
||||||
expires 6M;
|
|
||||||
access_log off;
|
|
||||||
}
|
|
||||||
|
|
||||||
location ~ \.woff2?$ {
|
|
||||||
try_files $uri =404;
|
|
||||||
expires 7d;
|
|
||||||
access_log off;
|
|
||||||
}
|
|
||||||
|
|
||||||
# PHP handling
|
|
||||||
location ~ \.php(?:$|/) {
|
location ~ \.php(?:$|/) {
|
||||||
fastcgi_split_path_info ^(.+?\.php)(/.*)$;
|
fastcgi_split_path_info ^(.+?\.php)(/.*)$;
|
||||||
set $path_info $fastcgi_path_info;
|
set $path_info $fastcgi_path_info;
|
||||||
@@ -60,10 +62,24 @@ server {
|
|||||||
fastcgi_param front_controller_active true;
|
fastcgi_param front_controller_active true;
|
||||||
fastcgi_pass php-handler;
|
fastcgi_pass php-handler;
|
||||||
fastcgi_intercept_errors on;
|
fastcgi_intercept_errors on;
|
||||||
|
fastcgi_hide_header X-Powered-By;
|
||||||
fastcgi_request_buffering off;
|
fastcgi_request_buffering off;
|
||||||
fastcgi_max_temp_file_size 0;
|
fastcgi_max_temp_file_size 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Serve static files directly, fall through to PHP for dynamic assets (e.g. theming)
|
||||||
|
location ~ \.(?:css|js|mjs|svg|gif|ico|jpg|png|webp|wasm|tflite|map|ogg|flac|mp4|webm)$ {
|
||||||
|
try_files $uri /index.php$request_uri;
|
||||||
|
add_header Cache-Control "public, max-age=15778463$asset_immutable";
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
||||||
|
location ~ \.woff2?$ {
|
||||||
|
try_files $uri /index.php$request_uri;
|
||||||
|
expires 7d;
|
||||||
|
access_log off;
|
||||||
|
}
|
||||||
|
|
||||||
# Default handler — route everything else through PHP front controller
|
# Default handler — route everything else through PHP front controller
|
||||||
location / {
|
location / {
|
||||||
rewrite ^ /index.php$request_uri last;
|
rewrite ^ /index.php$request_uri last;
|
||||||
|
|||||||
Reference in New Issue
Block a user