# Monitoring and Observability Services # Services for monitoring your homelab infrastructure # Place in /opt/stacks/monitoring/docker-compose.yml # RESTART POLICY GUIDE: # - unless-stopped: Core infrastructure services that should always run # - no: Services with Sablier lazy loading (start on-demand) # - See individual service comments for specific reasoning # Service Access URLs: # - Prometheus: http://${SERVER_IP}:9090 (or configure Traefik) # - Grafana: http://${SERVER_IP}:3000 (or configure Traefik) # - Uptime Kuma: https://status.${DOMAIN} # - Node Exporter: http://${SERVER_IP}:9100/metrics # - cAdvisor: http://${SERVER_IP}:8082 # - Loki: http://${SERVER_IP}:3100 # NOTE: Prometheus, Grafana, Loki use ports because they need to be accessible to other services # Add Traefik labels if you want https://prometheus.${DOMAIN} access services: # Prometheus - Metrics collection and storage # Access at: http://${SERVER_IP}:9090 prometheus: image: prom/prometheus:v2.48.1 deploy: resources: limits: cpus: '0.75' memory: 512M pids: 1024 reservations: cpus: '0.25' memory: 256M container_name: prometheus restart: unless-stopped networks: - homelab-network - traefik-network ports: - "9090:9090" volumes: - ./config/prometheus:/etc/prometheus - prometheus-data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - "homelab.category=monitoring" - "homelab.description=Metrics collection and time-series database" # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - "traefik.enable=true" - "traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)" - "traefik.http.routers.prometheus.entrypoints=websecure" - "traefik.http.routers.prometheus.tls=true" - "traefik.http.routers.prometheus.tls.certresolver=letsencrypt" - "traefik.http.routers.prometheus.middlewares=authelia@docker" - "traefik.http.services.prometheus.loadbalancer.server.port=9090" # Grafana - Metrics visualization # Access at: http://${SERVER_IP}:3000 # Default credentials: admin / admin (change on first login) grafana: image: grafana/grafana:10.2.3 deploy: resources: limits: cpus: '0.50' memory: 256M pids: 512 reservations: cpus: '0.25' memory: 128M container_name: grafana restart: unless-stopped networks: - homelab-network - traefik-network ports: - "3000:3000" volumes: - grafana-data:/var/lib/grafana - ./config/grafana/provisioning:/etc/grafana/provisioning environment: - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD} - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_ROOT_URL=https://grafana.${DOMAIN} - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel user: "${PUID}:${PGID}" depends_on: - prometheus labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - "homelab.category=monitoring" - "homelab.description=Metrics visualization and dashboards" # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - "traefik.enable=true" - "traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)" - "traefik.http.routers.grafana.entrypoints=websecure" - "traefik.http.routers.grafana.tls=true" - "traefik.http.routers.grafana.tls.certresolver=letsencrypt" - "traefik.http.routers.grafana.middlewares=authelia@docker" - "traefik.http.services.grafana.loadbalancer.server.port=3000" # Node Exporter - Host metrics exporter # Metrics at: http://${SERVER_IP}:9100/metrics node-exporter: image: prom/node-exporter:v1.7.0 container_name: node-exporter restart: unless-stopped networks: - homelab-network ports: - "9100:9100" volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.rootfs=/rootfs' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' labels: - "homelab.category=monitoring" - "homelab.description=Hardware and OS metrics exporter" # cAdvisor - Container metrics exporter # Access at: http://${SERVER_IP}:8082 cadvisor: image: gcr.io/cadvisor/cadvisor:v0.47.2 container_name: cadvisor restart: unless-stopped networks: - homelab-network - traefik-network ports: - "8082:8080" volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro - /dev/disk:/dev/disk:ro privileged: true devices: - /dev/kmsg labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - "homelab.category=monitoring" - "homelab.description=Container metrics and performance monitoring" # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - "traefik.enable=true" - "traefik.http.routers.cadvisor.rule=Host(`cadvisor.${DOMAIN}`)" - "traefik.http.routers.cadvisor.entrypoints=websecure" - "traefik.http.routers.cadvisor.tls=true" - "traefik.http.routers.cadvisor.tls.certresolver=letsencrypt" - "traefik.http.routers.cadvisor.middlewares=authelia@docker" - "traefik.http.services.cadvisor.loadbalancer.server.port=8080" # Uptime Kuma - Uptime monitoring # Access at: https://uptime-kuma.${DOMAIN} uptime-kuma: image: louislam/uptime-kuma:1 deploy: resources: limits: cpus: '0.50' memory: 256M pids: 512 reservations: cpus: '0.25' memory: 128M container_name: uptime-kuma restart: unless-stopped networks: - homelab-network - traefik-network ports: - "3001:3001" volumes: - uptime-kuma-data:/app/data - /var/run/docker.sock:/var/run/docker.sock:ro labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - "homelab.category=monitoring" - "homelab.description=Service uptime monitoring and alerts" # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - "traefik.enable=true" - "traefik.http.routers.uptime-kuma.rule=Host(`uptime-kuma.${DOMAIN}`)" - "traefik.http.routers.uptime-kuma.entrypoints=websecure" - "traefik.http.routers.uptime-kuma.tls=true" - "traefik.http.routers.uptime-kuma.tls.certresolver=letsencrypt" - "traefik.http.routers.uptime-kuma.middlewares=authelia@docker" - "traefik.http.services.uptime-kuma.loadbalancer.server.port=3001" # Loki - Log aggregation # Access at: http://${SERVER_IP}:3100 loki: image: grafana/loki:2.9.3 deploy: resources: limits: cpus: '0.75' memory: 512M pids: 1024 reservations: cpus: '0.25' memory: 256M container_name: loki restart: unless-stopped networks: - homelab-network - traefik-network ports: - "3100:3100" volumes: - ./config/loki:/etc/loki - loki-data:/loki command: -config.file=/etc/loki/loki-config.yml labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - "homelab.category=monitoring" - "homelab.description=Log aggregation system" # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - "traefik.enable=true" - "traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)" - "traefik.http.routers.loki.entrypoints=websecure" - "traefik.http.routers.loki.tls=true" - "traefik.http.routers.loki.tls.certresolver=letsencrypt" - "traefik.http.routers.loki.middlewares=authelia@docker" - "traefik.http.services.loki.loadbalancer.server.port=3100" # Promtail - Log shipper for Loki # Ships Docker container logs to Loki promtail: image: grafana/promtail:2.9.3 container_name: promtail restart: unless-stopped networks: - homelab-network volumes: - ./config/promtail:/etc/promtail - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro command: -config.file=/etc/promtail/promtail-config.yml depends_on: - loki labels: - "homelab.category=monitoring" - "homelab.description=Log collector for Loki" volumes: prometheus-data: driver: local grafana-data: driver: local uptime-kuma-data: driver: local loki-data: driver: local networks: homelab-network: external: true traefik-network: external: true x-dockge: urls: # Proxied URLs (through Traefik) - http://${SERVER_IP}:9090 - http://${SERVER_IP}:3000 - https://uptime-kuma.${DOMAIN} - http://${SERVER_IP}:9100/metrics - http://${SERVER_IP}:8082 - http://${SERVER_IP}:3100