littleshop/docker-compose.monitoring.yml
SysAdmin 68c5d2dfdf Production optimization: Docker configuration and monitoring stack
🚀 Docker Production Optimizations:
- Chiseled Ubuntu base image for minimal attack surface
- Non-root user execution with security hardening
- Read-only filesystem with targeted writable volumes
- Resource limits (1GB RAM, 1 CPU) with health checks
- Multi-stage builds optimized for caching
- Zero-downtime deployment automation

🔍 Comprehensive Monitoring Stack:
- Prometheus metrics collection with custom rules
- Grafana dashboards for application visualization
- AlertManager with email notifications for critical events
- Fluentd centralized logging with retention policies
- Node Exporter + cAdvisor for system/container metrics
- Health check endpoint (/health) for container orchestration

📋 Production Deployment Ready:
- Complete deployment scripts with backup strategy
- Environment templates for secure configuration
- Performance monitoring and alerting rules
- Enterprise-grade security and observability

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-19 12:35:41 +01:00

160 lines
5.0 KiB
YAML

version: '3.8'
# Monitoring and Observability Stack for LittleShop
# Includes: Prometheus, Grafana, AlertManager, and Log Aggregation
services:
# Prometheus for metrics collection
prometheus:
image: prom/prometheus:v2.53.0
container_name: littleshop_prometheus
restart: unless-stopped
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=15d'
- '--web.enable-lifecycle'
- '--web.enable-admin-api'
volumes:
- ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
networks:
- monitoring
- traefik
labels:
- "traefik.enable=true"
- "traefik.docker.network=traefik"
- "traefik.http.routers.prometheus.rule=Host(`prometheus.silverlabs.uk`)"
- "traefik.http.routers.prometheus.entrypoints=websecure"
- "traefik.http.routers.prometheus.tls=true"
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
# Grafana for visualization
grafana:
image: grafana/grafana:11.0.0
container_name: littleshop_grafana
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
- GF_USERS_ALLOW_SIGN_UP=false
- GF_SECURITY_DISABLE_GRAVATAR=true
- GF_ANALYTICS_REPORTING_ENABLED=false
- GF_ANALYTICS_CHECK_FOR_UPDATES=false
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
volumes:
- grafana_data:/var/lib/grafana
- ./docker/grafana/provisioning:/etc/grafana/provisioning:ro
- ./docker/grafana/dashboards:/var/lib/grafana/dashboards:ro
networks:
- monitoring
- traefik
depends_on:
- prometheus
labels:
- "traefik.enable=true"
- "traefik.docker.network=traefik"
- "traefik.http.routers.grafana.rule=Host(`grafana.silverlabs.uk`)"
- "traefik.http.routers.grafana.entrypoints=websecure"
- "traefik.http.routers.grafana.tls=true"
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
# AlertManager for alerting
alertmanager:
image: prom/alertmanager:v0.27.0
container_name: littleshop_alertmanager
restart: unless-stopped
command:
- '--config.file=/etc/alertmanager/alertmanager.yml'
- '--storage.path=/alertmanager'
- '--web.external-url=https://alerts.silverlabs.uk'
volumes:
- ./docker/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
- alertmanager_data:/alertmanager
networks:
- monitoring
- traefik
labels:
- "traefik.enable=true"
- "traefik.docker.network=traefik"
- "traefik.http.routers.alertmanager.rule=Host(`alerts.silverlabs.uk`)"
- "traefik.http.routers.alertmanager.entrypoints=websecure"
- "traefik.http.routers.alertmanager.tls=true"
- "traefik.http.routers.alertmanager.tls.certresolver=letsencrypt"
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
# Log aggregation with Fluentd
fluentd:
image: fluent/fluentd:v1.16-1
container_name: littleshop_fluentd
restart: unless-stopped
volumes:
- ./docker/fluentd.conf:/fluentd/etc/fluent.conf:ro
- littleshop_logs:/fluentd/log:ro
- fluentd_logs:/fluentd/log/output
- fluentd_buffer:/fluentd/log/buffer
networks:
- monitoring
environment:
- FLUENTD_CONF=fluent.conf
# Node Exporter for system metrics
node_exporter:
image: prom/node-exporter:v1.8.0
container_name: littleshop_node_exporter
restart: unless-stopped
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
networks:
- monitoring
# cAdvisor for container metrics
cadvisor:
image: gcr.io/cadvisor/cadvisor:v0.47.0
container_name: littleshop_cadvisor
restart: unless-stopped
privileged: true
devices:
- /dev/kmsg:/dev/kmsg
volumes:
- /:/rootfs:ro
- /var/run:/var/run:rw
- /sys:/sys:ro
- /var/lib/docker:/var/lib/docker:ro
- /cgroup:/cgroup:ro
networks:
- monitoring
command:
- '--housekeeping_interval=30s'
- '--docker_only=true'
volumes:
prometheus_data:
driver: local
grafana_data:
driver: local
alertmanager_data:
driver: local
littleshop_logs:
external: true
name: littleshop_littleshop_logs
fluentd_logs:
driver: local
fluentd_buffer:
driver: local
networks:
monitoring:
driver: bridge
traefik:
external: true