🚀 Docker Production Optimizations: - Chiseled Ubuntu base image for minimal attack surface - Non-root user execution with security hardening - Read-only filesystem with targeted writable volumes - Resource limits (1GB RAM, 1 CPU) with health checks - Multi-stage builds optimized for caching - Zero-downtime deployment automation 🔍 Comprehensive Monitoring Stack: - Prometheus metrics collection with custom rules - Grafana dashboards for application visualization - AlertManager with email notifications for critical events - Fluentd centralized logging with retention policies - Node Exporter + cAdvisor for system/container metrics - Health check endpoint (/health) for container orchestration 📋 Production Deployment Ready: - Complete deployment scripts with backup strategy - Environment templates for secure configuration - Performance monitoring and alerting rules - Enterprise-grade security and observability 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
160 lines
5.0 KiB
YAML
160 lines
5.0 KiB
YAML
version: '3.8'
|
|
|
|
# Monitoring and Observability Stack for LittleShop
|
|
# Includes: Prometheus, Grafana, AlertManager, and Log Aggregation
|
|
|
|
services:
|
|
# Prometheus for metrics collection
|
|
prometheus:
|
|
image: prom/prometheus:v2.53.0
|
|
container_name: littleshop_prometheus
|
|
restart: unless-stopped
|
|
command:
|
|
- '--config.file=/etc/prometheus/prometheus.yml'
|
|
- '--storage.tsdb.path=/prometheus'
|
|
- '--web.console.libraries=/etc/prometheus/console_libraries'
|
|
- '--web.console.templates=/etc/prometheus/consoles'
|
|
- '--storage.tsdb.retention.time=15d'
|
|
- '--web.enable-lifecycle'
|
|
- '--web.enable-admin-api'
|
|
volumes:
|
|
- ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
|
- prometheus_data:/prometheus
|
|
networks:
|
|
- monitoring
|
|
- traefik
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.docker.network=traefik"
|
|
- "traefik.http.routers.prometheus.rule=Host(`prometheus.silverlabs.uk`)"
|
|
- "traefik.http.routers.prometheus.entrypoints=websecure"
|
|
- "traefik.http.routers.prometheus.tls=true"
|
|
- "traefik.http.routers.prometheus.tls.certresolver=letsencrypt"
|
|
- "traefik.http.services.prometheus.loadbalancer.server.port=9090"
|
|
|
|
# Grafana for visualization
|
|
grafana:
|
|
image: grafana/grafana:11.0.0
|
|
container_name: littleshop_grafana
|
|
restart: unless-stopped
|
|
environment:
|
|
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_ADMIN_PASSWORD:-admin}
|
|
- GF_SECURITY_ADMIN_USER=${GRAFANA_ADMIN_USER:-admin}
|
|
- GF_USERS_ALLOW_SIGN_UP=false
|
|
- GF_SECURITY_DISABLE_GRAVATAR=true
|
|
- GF_ANALYTICS_REPORTING_ENABLED=false
|
|
- GF_ANALYTICS_CHECK_FOR_UPDATES=false
|
|
- GF_INSTALL_PLUGINS=grafana-clock-panel,grafana-simple-json-datasource
|
|
volumes:
|
|
- grafana_data:/var/lib/grafana
|
|
- ./docker/grafana/provisioning:/etc/grafana/provisioning:ro
|
|
- ./docker/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
|
networks:
|
|
- monitoring
|
|
- traefik
|
|
depends_on:
|
|
- prometheus
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.docker.network=traefik"
|
|
- "traefik.http.routers.grafana.rule=Host(`grafana.silverlabs.uk`)"
|
|
- "traefik.http.routers.grafana.entrypoints=websecure"
|
|
- "traefik.http.routers.grafana.tls=true"
|
|
- "traefik.http.routers.grafana.tls.certresolver=letsencrypt"
|
|
- "traefik.http.services.grafana.loadbalancer.server.port=3000"
|
|
|
|
# AlertManager for alerting
|
|
alertmanager:
|
|
image: prom/alertmanager:v0.27.0
|
|
container_name: littleshop_alertmanager
|
|
restart: unless-stopped
|
|
command:
|
|
- '--config.file=/etc/alertmanager/alertmanager.yml'
|
|
- '--storage.path=/alertmanager'
|
|
- '--web.external-url=https://alerts.silverlabs.uk'
|
|
volumes:
|
|
- ./docker/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro
|
|
- alertmanager_data:/alertmanager
|
|
networks:
|
|
- monitoring
|
|
- traefik
|
|
labels:
|
|
- "traefik.enable=true"
|
|
- "traefik.docker.network=traefik"
|
|
- "traefik.http.routers.alertmanager.rule=Host(`alerts.silverlabs.uk`)"
|
|
- "traefik.http.routers.alertmanager.entrypoints=websecure"
|
|
- "traefik.http.routers.alertmanager.tls=true"
|
|
- "traefik.http.routers.alertmanager.tls.certresolver=letsencrypt"
|
|
- "traefik.http.services.alertmanager.loadbalancer.server.port=9093"
|
|
|
|
# Log aggregation with Fluentd
|
|
fluentd:
|
|
image: fluent/fluentd:v1.16-1
|
|
container_name: littleshop_fluentd
|
|
restart: unless-stopped
|
|
volumes:
|
|
- ./docker/fluentd.conf:/fluentd/etc/fluent.conf:ro
|
|
- littleshop_logs:/fluentd/log:ro
|
|
- fluentd_logs:/fluentd/log/output
|
|
- fluentd_buffer:/fluentd/log/buffer
|
|
networks:
|
|
- monitoring
|
|
environment:
|
|
- FLUENTD_CONF=fluent.conf
|
|
|
|
# Node Exporter for system metrics
|
|
node_exporter:
|
|
image: prom/node-exporter:v1.8.0
|
|
container_name: littleshop_node_exporter
|
|
restart: unless-stopped
|
|
command:
|
|
- '--path.procfs=/host/proc'
|
|
- '--path.sysfs=/host/sys'
|
|
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
|
|
volumes:
|
|
- /proc:/host/proc:ro
|
|
- /sys:/host/sys:ro
|
|
- /:/rootfs:ro
|
|
networks:
|
|
- monitoring
|
|
|
|
# cAdvisor for container metrics
|
|
cadvisor:
|
|
image: gcr.io/cadvisor/cadvisor:v0.47.0
|
|
container_name: littleshop_cadvisor
|
|
restart: unless-stopped
|
|
privileged: true
|
|
devices:
|
|
- /dev/kmsg:/dev/kmsg
|
|
volumes:
|
|
- /:/rootfs:ro
|
|
- /var/run:/var/run:rw
|
|
- /sys:/sys:ro
|
|
- /var/lib/docker:/var/lib/docker:ro
|
|
- /cgroup:/cgroup:ro
|
|
networks:
|
|
- monitoring
|
|
command:
|
|
- '--housekeeping_interval=30s'
|
|
- '--docker_only=true'
|
|
|
|
volumes:
|
|
prometheus_data:
|
|
driver: local
|
|
grafana_data:
|
|
driver: local
|
|
alertmanager_data:
|
|
driver: local
|
|
littleshop_logs:
|
|
external: true
|
|
name: littleshop_littleshop_logs
|
|
fluentd_logs:
|
|
driver: local
|
|
fluentd_buffer:
|
|
driver: local
|
|
|
|
networks:
|
|
monitoring:
|
|
driver: bridge
|
|
traefik:
|
|
external: true |