# Monitoring and Observability Services # RESTART POLICY GUIDE: # - unless-stopped: Core infrastructure services that should always run # - no: Services with Sablier lazy loading (start on-demand) # - See individual service comments for specific reasoning services: # Prometheus - Metrics collection and storage prometheus: image: prom/prometheus:v2.48.1 deploy: resources: limits: cpus: '0.75' memory: 512M pids: 1024 reservations: cpus: '0.25' memory: 256M container_name: prometheus restart: unless-stopped networks: - homelab-network - traefik-network ports: - '9090:9090' volumes: - ./config/prometheus:/etc/prometheus - prometheus-data:/prometheus command: - '--config.file=/etc/prometheus/prometheus.yml' - '--storage.tsdb.path=/prometheus' - '--storage.tsdb.retention.time=30d' - '--web.console.libraries=/etc/prometheus/console_libraries' - '--web.console.templates=/etc/prometheus/consoles' - '--web.enable-lifecycle' labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - 'homelab.category=monitoring' - 'homelab.description=Metrics collection and time-series database' # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - 'traefik.enable=true' - 'traefik.docker.network=traefik-network' - 'traefik.http.routers.prometheus.rule=Host(`prometheus.${DOMAIN}`)' - 'traefik.http.routers.prometheus.entrypoints=websecure' - 'traefik.http.routers.prometheus.tls=true' - 'traefik.http.routers.prometheus.tls.certresolver=letsencrypt' - 'traefik.http.routers.prometheus.middlewares=authelia@docker' - 'traefik.http.services.prometheus.loadbalancer.server.port=9090' # Grafana - Metrics visualization # Default credentials: admin / admin (change on first login) grafana: image: grafana/grafana:10.2.3 deploy: resources: limits: cpus: '0.50' memory: 256M pids: 512 reservations: cpus: '0.25' memory: 128M container_name: grafana restart: unless-stopped networks: - homelab-network - traefik-network ports: - '3000:3000' volumes: - grafana-data:/var/lib/grafana - ./config/grafana/provisioning:/etc/grafana/provisioning environment: - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD} - GF_USERS_ALLOW_SIGN_UP=false - GF_SERVER_ROOT_URL=https://grafana.${DOMAIN} - GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource,grafana-piechart-panel user: '1000:1000' depends_on: - prometheus labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - 'homelab.category=monitoring' - 'homelab.description=Metrics visualization and dashboards' # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - 'traefik.enable=true' - 'traefik.docker.network=traefik-network' - 'traefik.http.routers.grafana.rule=Host(`grafana.${DOMAIN}`)' - 'traefik.http.routers.grafana.entrypoints=websecure' - 'traefik.http.routers.grafana.tls=true' - 'traefik.http.routers.grafana.tls.certresolver=letsencrypt' - 'traefik.http.routers.grafana.middlewares=authelia@docker' - 'traefik.http.services.grafana.loadbalancer.server.port=3000' # Node Exporter - Host metrics exporter # Metrics at: http://192.168.4.4:9100/metrics node-exporter: image: prom/node-exporter:v1.7.0 container_name: node-exporter restart: unless-stopped networks: - homelab-network ports: - '9100:9100' volumes: - /proc:/host/proc:ro - /sys:/host/sys:ro - /:/rootfs:ro command: - '--path.procfs=/host/proc' - '--path.rootfs=/rootfs' - '--path.sysfs=/host/sys' - '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)' labels: - 'homelab.category=monitoring' - 'homelab.description=Hardware and OS metrics exporter' # cAdvisor - Container metrics exporter # Access at: http://192.168.4.4:8082 cadvisor: image: gcr.io/cadvisor/cadvisor:v0.47.2 container_name: cadvisor restart: unless-stopped networks: - homelab-network - traefik-network ports: - '8082:8080' volumes: - /:/rootfs:ro - /var/run:/var/run:ro - /sys:/sys:ro - /var/lib/docker:/var/lib/docker:ro - /dev/disk:/dev/disk:ro privileged: true devices: - /dev/kmsg labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - 'homelab.category=monitoring' - 'homelab.description=Container metrics and performance monitoring' # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - 'traefik.enable=true' - 'traefik.docker.network=traefik-network' - 'traefik.http.routers.cadvisor.rule=Host(`cadvisor.${DOMAIN}`)' - 'traefik.http.routers.cadvisor.entrypoints=websecure' - 'traefik.http.routers.cadvisor.tls=true' - 'traefik.http.routers.cadvisor.tls.certresolver=letsencrypt' - 'traefik.http.routers.cadvisor.middlewares=authelia@docker' - 'traefik.http.services.cadvisor.loadbalancer.server.port=8080' # Uptime Kuma - Uptime monitoring uptime-kuma: image: louislam/uptime-kuma:1 deploy: resources: limits: cpus: '0.50' memory: 256M pids: 512 reservations: cpus: '0.25' memory: 128M container_name: uptime-kuma restart: unless-stopped networks: - homelab-network - traefik-network ports: - '3001:3001' volumes: - uptime-kuma-data:/app/data - /var/run/docker.sock:/var/run/docker.sock:ro labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - 'homelab.category=monitoring' - 'homelab.description=Service uptime monitoring and alerts' # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - 'traefik.enable=true' - 'traefik.docker.network=traefik-network' - 'traefik.http.routers.uptime-kuma.rule=Host(`uptime-kuma.${DOMAIN}`)' - 'traefik.http.routers.uptime-kuma.entrypoints=websecure' - 'traefik.http.routers.uptime-kuma.tls=true' - 'traefik.http.routers.uptime-kuma.tls.certresolver=letsencrypt' - 'traefik.http.routers.uptime-kuma.middlewares=authelia@docker' - 'traefik.http.services.uptime-kuma.loadbalancer.server.port=3001' # Loki - Log aggregation # Access at: http://192.168.4.4:3100 loki: image: grafana/loki:2.9.3 deploy: resources: limits: cpus: '0.75' memory: 512M pids: 1024 reservations: cpus: '0.25' memory: 256M container_name: loki restart: unless-stopped networks: - homelab-network - traefik-network ports: - '3100:3100' volumes: - ./config/loki:/etc/loki - loki-data:/loki command: -config.file=/etc/loki/loki-config.yml labels: # TRAEFIK CONFIGURATION # ========================================== # Service metadata - 'homelab.category=monitoring' - 'homelab.description=Log aggregation system' # Traefik reverse proxy (comment/uncomment to disable/enable) # If Traefik is on a remote server: these labels are NOT USED; # configure external yml files in /traefik/dynamic folder instead. - 'traefik.enable=true' - 'traefik.docker.network=traefik-network' - 'traefik.http.routers.loki.rule=Host(`loki.${DOMAIN}`)' - 'traefik.http.routers.loki.entrypoints=websecure' - 'traefik.http.routers.loki.tls=true' - 'traefik.http.routers.loki.tls.certresolver=letsencrypt' - 'traefik.http.routers.loki.middlewares=authelia@docker' - 'traefik.http.services.loki.loadbalancer.server.port=3100' # Promtail - Log shipper for Loki # Ships Docker container logs to Loki promtail: image: grafana/promtail:2.9.3 container_name: promtail restart: unless-stopped networks: - homelab-network volumes: - ./config/promtail:/etc/promtail - /var/log:/var/log:ro - /var/lib/docker/containers:/var/lib/docker/containers:ro command: -config.file=/etc/promtail/promtail-config.yml depends_on: - loki labels: - 'homelab.category=monitoring' - 'homelab.description=Log collector for Loki' volumes: prometheus-data: driver: local grafana-data: driver: local uptime-kuma-data: driver: local loki-data: driver: local networks: homelab-network: external: true traefik-network: external: true x-dockge: urls: # Proxied URLs (through Traefik) - http://192.168.4.4:9090 - http://192.168.4.4:3000 - https://uptime-kuma.${DOMAIN} - http://192.168.4.4:9100/metrics - http://192.168.4.4:8082 - http://192.168.4.4:3100