Files
EZ-Homelab/docker-compose/monitoring/docker-compose.yml

291 lines
9.6 KiB
YAML

# Monitoring and Observability Services
# RESTART POLICY GUIDE:
# - unless-stopped: Core infrastructure services that should always run
# - no: Services with Sablier lazy loading (start on-demand)
# - See individual service comments for specific reasoning
services:
# Prometheus - Metrics collection and storage
prometheus:
image: prom/prometheus:v2.48.1
deploy:
resources:
limits:
cpus: '0.75'
memory: 512M
pids: 1024
reservations:
cpus: '0.25'
memory: 256M
container_name: prometheus
restart: unless-stopped
networks:
- homelab-network
- traefik-network
ports:
- "9090:9090"
volumes:
- ./config/prometheus:/etc/prometheus
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--web.enable-lifecycle'
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Metrics collection and time-series database"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.docker.network=traefik-network"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls=true"
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
- "traefik.http.routers.prometheus.middlewares=authelia@docker"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
# Grafana - Metrics visualization
# Default credentials: admin / admin (change on first login)
grafana:
image: grafana/grafana:10.2.3
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 512
reservations:
cpus: '0.25'
memory: 128M
container_name: grafana
restart: unless-stopped
networks:
- homelab-network
- traefik-network
ports:
- "3000:3000"
volumes:
- grafana-data:/var/lib/grafana
- ./config/grafana/provisioning:/etc/grafana/provisioning
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SERVER_ROOT_URL=https://grafana.${DOMAIN}
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel
user: "1000:1000"
depends_on:
- prometheus
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Metrics visualization and dashboards"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.docker.network=traefik-network"
- "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.routers.grafana.middlewares=authelia@docker"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
# Node Exporter - Host metrics exporter
# Metrics at: http://192.168.4.4:9100/metrics
node-exporter:
image: prom/node-exporter:v1.7.0
container_name: node-exporter
restart: unless-stopped
networks:
- homelab-network
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.rootfs=/rootfs'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
labels:
- "homelab.category=monitoring"
- "homelab.description=Hardware and OS metrics exporter"
# cAdvisor - Container metrics exporter
# Access at: http://192.168.4.4:8082
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.2
container_name: cadvisor
restart: unless-stopped
networks:
- homelab-network
- traefik-network
ports:
- "8082:8080"
volumes:
- /:/rootfs:ro
- /var/run:/var/run:ro
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /dev/disk:/dev/disk:ro
privileged: true
devices:
- /dev/kmsg
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Container metrics and performance monitoring"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.docker.network=traefik-network"
- "traefik.http.routers.cadvisor.rule=Host(`cadvisor.${DOMAIN}`)"
- "traefik.http.routers.cadvisor.entrypoints=websecure"
- "traefik.http.routers.cadvisor.tls=true"
- "traefik.http.routers.cadvisor.tls.certresolver=letsencrypt"
- "traefik.http.routers.cadvisor.middlewares=authelia@docker"
- "traefik.http.services.cadvisor.loadbalancer.server.port=8080"
# Uptime Kuma - Uptime monitoring
uptime-kuma:
image: louislam/uptime-kuma:1
deploy:
resources:
limits:
cpus: '0.50'
memory: 256M
pids: 512
reservations:
cpus: '0.25'
memory: 128M
container_name: uptime-kuma
restart: unless-stopped
networks:
- homelab-network
- traefik-network
ports:
- "3001:3001"
volumes:
- uptime-kuma-data:/app/data
- /var/run/docker.sock:/var/run/docker.sock:ro
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Service uptime monitoring and alerts"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.docker.network=traefik-network"
- "traefik.http.routers.uptime-kuma.rule=Host(`uptime-kuma.${DOMAIN}`)"
- "traefik.http.routers.uptime-kuma.entrypoints=websecure"
- "traefik.http.routers.uptime-kuma.tls=true"
- "traefik.http.routers.uptime-kuma.tls.certresolver=letsencrypt"
- "traefik.http.routers.uptime-kuma.middlewares=authelia@docker"
- "traefik.http.services.uptime-kuma.loadbalancer.server.port=3001"
# Loki - Log aggregation
# Access at: http://192.168.4.4:3100
loki:
image: grafana/loki:2.9.3
deploy:
resources:
limits:
cpus: '0.75'
memory: 512M
pids: 1024
reservations:
cpus: '0.25'
memory: 256M
container_name: loki
restart: unless-stopped
networks:
- homelab-network
- traefik-network
ports:
- "3100:3100"
volumes:
- ./config/loki:/etc/loki
- loki-data:/loki
command: -config.file=/etc/loki/loki-config.yml
labels:
# TRAEFIK CONFIGURATION
# ==========================================
# Service metadata
- "homelab.category=monitoring"
- "homelab.description=Log aggregation system"
# Traefik reverse proxy (comment/uncomment to disable/enable)
# If Traefik is on a remote server: these labels are NOT USED;
# configure external yml files in /traefik/dynamic folder instead.
- "traefik.enable=true"
- "traefik.docker.network=traefik-network"
- "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)"
- "traefik.http.routers.loki.entrypoints=websecure"
- "traefik.http.routers.loki.tls=true"
- "traefik.http.routers.loki.tls.certresolver=letsencrypt"
- "traefik.http.routers.loki.middlewares=authelia@docker"
- "traefik.http.services.loki.loadbalancer.server.port=3100"
# Promtail - Log shipper for Loki
# Ships Docker container logs to Loki
promtail:
image: grafana/promtail:2.9.3
container_name: promtail
restart: unless-stopped
networks:
- homelab-network
volumes:
- ./config/promtail:/etc/promtail
- /var/log:/var/log:ro
- /var/lib/docker/containers:/var/lib/docker/containers:ro
command: -config.file=/etc/promtail/promtail-config.yml
depends_on:
- loki
labels:
- "homelab.category=monitoring"
- "homelab.description=Log collector for Loki"
volumes:
prometheus-data:
driver: local
grafana-data:
driver: local
uptime-kuma-data:
driver: local
loki-data:
driver: local
networks:
homelab-network:
external: true
traefik-network:
external: true
x-dockge:
urls:
# Proxied URLs (through Traefik)
- http://192.168.4.4:9090
- http://192.168.4.4:3000
- https://uptime-kuma.${DOMAIN}
- http://192.168.4.4:9100/metrics
- http://192.168.4.4:8082
- http://192.168.4.4:3100