Files
EZ-Homelab/docker-compose/monitoring/docker-compose.yml
EZ-Homelab c5d54c6bc7 Fix variable substitution patterns: remove :-default fallbacks
- Replace all ${VARIABLE:-default} with ${VARIABLE} in compose files
- Ensure explicit variable requirements without default values
- Updated 10 docker-compose.yml files across all stacks
- Made reset-ondemand-services.sh executable
2026-01-24 15:20:31 -05:00

299 lines
10 KiB
YAML

# Monitoring and Observability Services
# Services for monitoring your homelab infrastructure
# Place in /opt/stacks/monitoring/docker-compose.yml
# RESTART POLICY GUIDE:
# - unless-stopped: Core infrastructure services that should always run
# - no: Services with Sablier lazy loading (start on-demand)
# - See individual service comments for specific reasoning
# Service Access URLs:
# - Prometheus: http://server-ip:9090 (or configure Traefik)
# - Grafana: http://server-ip:3000 (or configure Traefik)
# - Uptime Kuma: https://status.${DOMAIN}
# - Node Exporter: http://server-ip:9100/metrics
# - cAdvisor: http://server-ip:8082
# - Loki: http://server-ip:3100
# NOTE: Prometheus, Grafana, Loki use ports because they need to be accessible to other services
# Add Traefik labels if you want https://prometheus.${DOMAIN} access
services:
# Prometheus - Metrics collection and storage
# Access at: http://server-ip:9090
prometheus:
image: prom/prometheus:v2.48.1
deploy:
resources:
limits:
cpus: '0.75'
memory: 512M
pids: 1024
reservations:
cpus: '0.25'
memory: 256M
container_name: prometheus
restart: unless-stopped
networks:
- monitoring-network
- homelab-network
- traefik-network
ports:
- "9090:9090"
volumes:
- ./config/prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Metrics collection and time-series database"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls=true"
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
- "traefik.http.routers.prometheus.middlewares=authelia@docker"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
# Grafana - Metrics visualization
# Access at: http://server-ip:3000
# Default credentials: admin / admin (change on first login)
grafana:
image: grafana/grafana:10.2.3
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 512
reservations:
cpus: '0.25'
memory: 128M
container_name: grafana
restart: unless-stopped
networks:
- monitoring-network
- homelab-network
- traefik-network
ports:
- "3000:3000"
volumes:
- grafana-data:/var/lib/grafana
- ./config/grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_ROOT_URL=https://grafana.${DOMAIN}
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
user: "${PUID}:${PGID}"
depends_on:
- prometheus
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Metrics visualization and dashboards"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.routers.grafana.middlewares=authelia@docker"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
# Node Exporter - Host metrics exporter
# Metrics at: http://server-ip:9100/metrics
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: node-exporter
restart: unless-stopped
networks:
- monitoring-network
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
labels:
- "homelab.category=monitoring"
- "homelab.description=Hardware and OS metrics exporter"
# cAdvisor - Container metrics exporter
# Access at: http://server-ip:8082
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
restart: unless-stopped
networks:
- monitoring-network
- homelab-network
- traefik-network
ports:
- "8082:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Container metrics and performance monitoring"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.http.routers.cadvisor.rule=Host(`cadvisor.${DOMAIN}`)"
- "traefik.http.routers.cadvisor.entrypoints=websecure"
- "traefik.http.routers.cadvisor.tls=true"
- "traefik.http.routers.cadvisor.tls.certresolver=letsencrypt"
- "traefik.http.routers.cadvisor.middlewares=authelia@docker"
- "traefik.http.services.cadvisor.loadbalancer.server.port=8080"
# Uptime Kuma - Uptime monitoring
# Access at: https://uptime-kuma.${DOMAIN}
uptime-kuma:
image: louislam/uptime-kuma:1
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 512
reservations:
cpus: '0.25'
memory: 128M
container_name: uptime-kuma
restart: unless-stopped
networks:
- monitoring-network
- homelab-network
- traefik-network
ports:
- "3001:3001"
volumes:
- uptime-kuma-data:/app/data
- /var/run/docker.sock:/var/run/docker.sock:ro
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Service uptime monitoring and alerts"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.http.routers.uptime-kuma.rule=Host(`uptime-kuma.${DOMAIN}`)"
- "traefik.http.routers.uptime-kuma.entrypoints=websecure"
- "traefik.http.routers.uptime-kuma.tls=true"
- "traefik.http.routers.uptime-kuma.tls.certresolver=letsencrypt"
- "traefik.http.routers.uptime-kuma.middlewares=authelia@docker"
- "traefik.http.services.uptime-kuma.loadbalancer.server.port=3001"
# Loki - Log aggregation
# Access at: http://server-ip:3100
loki:
image: grafana/loki:2.9.3
deploy:
resources:
limits:
cpus: '0.75'
memory: 512M
pids: 1024
reservations:
cpus: '0.25'
memory: 256M
container_name: loki
restart: unless-stopped
networks:
- monitoring-network
- homelab-network
- traefik-network
ports:
- "3100:3100"
volumes:
- ./config/loki:/etc/loki
- loki-data:/loki
command: -config.file=/etc/loki/loki-config.yml
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Log aggregation system"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)"
- "traefik.http.routers.loki.entrypoints=websecure"
- "traefik.http.routers.loki.tls=true"
- "traefik.http.routers.loki.tls.certresolver=letsencrypt"
- "traefik.http.routers.loki.middlewares=authelia@docker"
- "traefik.http.services.loki.loadbalancer.server.port=3100"
# Promtail - Log shipper for Loki
# Ships Docker container logs to Loki
promtail:
image: grafana/promtail:2.9.3
container_name: promtail
restart: unless-stopped
networks:
- monitoring-network
volumes:
- ./config/promtail:/etc/promtail
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
command: -config.file=/etc/promtail/promtail-config.yml
depends_on:
- loki
labels:
- "homelab.category=monitoring"
- "homelab.description=Log collector for Loki"
volumes:
prometheus-data:
driver: local
grafana-data:
driver: local
uptime-kuma-data:
driver: local
loki-data:
driver: local
networks:
monitoring-network:
driver: bridge
homelab-network:
external: true
traefik-network:
external: true