- Added Traefik labels and routing to prometheus, grafana, loki, cadvisor - Fixed Grafana ROOT_URL to use domain-based URL (https://grafana.${DOMAIN}) - Added uptime-kuma bypass rule in Authelia (needs initial setup) - Updated all services to use traefik-network - Synced domain from kelin-hass to kelin-casa across all configs - Fixed missing tls=true label on uptime-kuma - Note: Loki is API-only service (no web UI, accessed via Grafana)
228 lines
7.6 KiB
YAML
228 lines
7.6 KiB
YAML
# Monitoring and Observability Services
|
|
# Services for monitoring your homelab infrastructure
|
|
# Place in /opt/stacks/monitoring/docker-compose.yml
|
|
|
|
# Service Access URLs:
|
|
# - Prometheus: http://server-ip:9090 (or configure Traefik)
|
|
# - Grafana: http://server-ip:3000 (or configure Traefik)
|
|
# - Uptime Kuma: https://status.${DOMAIN}
|
|
# - Node Exporter: http://server-ip:9100/metrics
|
|
# - cAdvisor: http://server-ip:8082
|
|
# - Loki: http://server-ip:3100
|
|
# NOTE: Prometheus, Grafana, Loki use ports because they need to be accessible to other services
|
|
# Add Traefik labels if you want https://prometheus.${DOMAIN} access
|
|
|
|
services:
|
|
# Prometheus - Metrics collection and storage
|
|
# Access at: http://server-ip:9090
|
|
prometheus:
|
|
image: prom/prometheus:v2.48.1
|
|
container_name: prometheus
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
- homelab-network
|
|
- traefik-network
|
|
ports:
|
|
- "9090:9090"
|
|
volumes:
|
|
- ./config/prometheus:/etc/prometheus
|
|
- prometheus-data:/prometheus
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--storage.tsdb.retention.time=30d'
|
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
- '--web.console.templates=/etc/prometheus/consoles'
|
|
- '--web.enable-lifecycle'
|
|
user: "${PUID:-1000}:${PGID:-1000}"
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Metrics collection and time-series database"
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)"
|
|
- "traefik.http.routers.prometheus.entrypoints=websecure"
|
|
- "traefik.http.routers.prometheus.tls=true"
|
|
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.prometheus.middlewares=authelia@docker"
|
|
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
|
|
|
# Grafana - Metrics visualization
|
|
# Access at: http://server-ip:3000
|
|
# Default credentials: admin / admin (change on first login)
|
|
grafana:
|
|
image: grafana/grafana:10.2.3
|
|
container_name: grafana
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
- homelab-network
|
|
- traefik-network
|
|
ports:
|
|
- "3000:3000"
|
|
volumes:
|
|
- grafana-data:/var/lib/grafana
|
|
- ./config/grafana/provisioning:/etc/grafana/provisioning
|
|
environment:
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_SERVER_ROOT_URL=https://grafana.${DOMAIN}
|
|
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
|
|
user: "${PUID:-1000}:${PGID:-1000}"
|
|
depends_on:
|
|
- prometheus
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Metrics visualization and dashboards"
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)"
|
|
- "traefik.http.routers.grafana.entrypoints=websecure"
|
|
- "traefik.http.routers.grafana.tls=true"
|
|
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.grafana.middlewares=authelia@docker"
|
|
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
|
|
|
# Node Exporter - Host metrics exporter
|
|
# Metrics at: http://server-ip:9100/metrics
|
|
node-exporter:
|
|
image: prom/node-exporter:v1.7.0
|
|
container_name: node-exporter
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
ports:
|
|
- "9100:9100"
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
command:
|
|
- '--path.procfs=/host/proc'
|
|
- '--path.rootfs=/rootfs'
|
|
- '--path.sysfs=/host/sys'
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Hardware and OS metrics exporter"
|
|
|
|
# cAdvisor - Container metrics exporter
|
|
# Access at: http://server-ip:8082
|
|
cadvisor:
|
|
image: gcr.io/cadvisor/cadvisor:v0.47.2
|
|
container_name: cadvisor
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
- homelab-network
|
|
- traefik-network
|
|
ports:
|
|
- "8082:8080"
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:ro
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker:/var/lib/docker:ro
|
|
- /dev/disk:/dev/disk:ro
|
|
privileged: true
|
|
devices:
|
|
- /dev/kmsg
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Container metrics and performance monitoring"
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.cadvisor.rule=Host(`cadvisor.${DOMAIN}`)"
|
|
- "traefik.http.routers.cadvisor.entrypoints=websecure"
|
|
- "traefik.http.routers.cadvisor.tls=true"
|
|
- "traefik.http.routers.cadvisor.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.cadvisor.middlewares=authelia@docker"
|
|
- "traefik.http.services.cadvisor.loadbalancer.server.port=8080"
|
|
|
|
# Uptime Kuma - Uptime monitoring
|
|
# Access at: https://status.${DOMAIN}
|
|
uptime-kuma:
|
|
image: louislam/uptime-kuma:1
|
|
container_name: uptime-kuma
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
- homelab-network
|
|
- traefik-network
|
|
volumes:
|
|
- uptime-kuma-data:/app/data
|
|
- /var/run/docker.sock:/var/run/docker.sock:ro
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Service uptime monitoring and alerts"
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.uptime-kuma.rule=Host(`status.${DOMAIN}`)"
|
|
- "traefik.http.routers.uptime-kuma.entrypoints=websecure"
|
|
- "traefik.http.routers.uptime-kuma.tls=true"
|
|
- "traefik.http.routers.uptime-kuma.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.uptime-kuma.middlewares=authelia@docker"
|
|
- "traefik.http.services.uptime-kuma.loadbalancer.server.port=3001"
|
|
|
|
# Loki - Log aggregation
|
|
# Access at: http://server-ip:3100
|
|
loki:
|
|
image: grafana/loki:2.9.3
|
|
container_name: loki
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
- homelab-network
|
|
- traefik-network
|
|
ports:
|
|
- "3100:3100"
|
|
volumes:
|
|
- ./config/loki:/etc/loki
|
|
- loki-data:/loki
|
|
command: -config.file=/etc/loki/loki-config.yml
|
|
user: "${PUID:-1000}:${PGID:-1000}"
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Log aggregation system"
|
|
- "traefik.enable=true"
|
|
- "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)"
|
|
- "traefik.http.routers.loki.entrypoints=websecure"
|
|
- "traefik.http.routers.loki.tls=true"
|
|
- "traefik.http.routers.loki.tls.certresolver=letsencrypt"
|
|
- "traefik.http.routers.loki.middlewares=authelia@docker"
|
|
- "traefik.http.services.loki.loadbalancer.server.port=3100"
|
|
|
|
# Promtail - Log shipper for Loki
|
|
# Ships Docker container logs to Loki
|
|
promtail:
|
|
image: grafana/promtail:2.9.3
|
|
container_name: promtail
|
|
restart: unless-stopped
|
|
networks:
|
|
- monitoring-network
|
|
volumes:
|
|
- ./config/promtail:/etc/promtail
|
|
- /var/log:/var/log:ro
|
|
- /var/lib/docker/containers:/var/lib/docker/containers:ro
|
|
command: -config.file=/etc/promtail/promtail-config.yml
|
|
depends_on:
|
|
- loki
|
|
labels:
|
|
- "homelab.category=monitoring"
|
|
- "homelab.description=Log collector for Loki"
|
|
|
|
volumes:
|
|
prometheus-data:
|
|
driver: local
|
|
grafana-data:
|
|
driver: local
|
|
uptime-kuma-data:
|
|
driver: local
|
|
loki-data:
|
|
driver: local
|
|
|
|
networks:
|
|
monitoring-network:
|
|
driver: bridge
|
|
homelab-network:
|
|
external: true
|
|
traefik-network:
|
|
external: true
|