littleshop/docker/alert_rules.yml
SysAdmin 68c5d2dfdf Production optimization: Docker configuration and monitoring stack
🚀 Docker Production Optimizations:
- Chiseled Ubuntu base image for minimal attack surface
- Non-root user execution with security hardening
- Read-only filesystem with targeted writable volumes
- Resource limits (1GB RAM, 1 CPU) with health checks
- Multi-stage builds optimized for caching
- Zero-downtime deployment automation

🔍 Comprehensive Monitoring Stack:
- Prometheus metrics collection with custom rules
- Grafana dashboards for application visualization
- AlertManager with email notifications for critical events
- Fluentd centralized logging with retention policies
- Node Exporter + cAdvisor for system/container metrics
- Health check endpoint (/health) for container orchestration

📋 Production Deployment Ready:
- Complete deployment scripts with backup strategy
- Environment templates for secure configuration
- Performance monitoring and alerting rules
- Enterprise-grade security and observability

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-09-19 12:35:41 +01:00

80 lines
2.8 KiB
YAML

groups:
- name: littleshop_alerts
rules:
# Application health alerts
- alert: LittleShopDown
expr: up{job="littleshop"} == 0
for: 1m
labels:
severity: critical
annotations:
summary: "LittleShop application is down"
description: "LittleShop application has been down for more than 1 minute."
- alert: LittleShopHealthCheckFailing
expr: up{job="littleshop-health"} == 0
for: 2m
labels:
severity: warning
annotations:
summary: "LittleShop health check is failing"
description: "LittleShop health check has been failing for more than 2 minutes."
# Performance alerts
- alert: HighCpuUsage
expr: rate(container_cpu_usage_seconds_total{name="littleshop_prod"}[5m]) * 100 > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage detected"
description: "LittleShop container CPU usage is above 80% for more than 5 minutes."
- alert: HighMemoryUsage
expr: (container_memory_usage_bytes{name="littleshop_prod"} / container_spec_memory_limit_bytes{name="littleshop_prod"}) * 100 > 85
for: 5m
labels:
severity: warning
annotations:
summary: "High memory usage detected"
description: "LittleShop container memory usage is above 85% for more than 5 minutes."
# System alerts
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 10
for: 5m
labels:
severity: critical
annotations:
summary: "Disk space is running low"
description: "Available disk space is less than 10%."
- alert: HighErrorRate
expr: rate(littleshop_http_requests_total{status=~"5.."}[5m]) / rate(littleshop_http_requests_total[5m]) * 100 > 5
for: 3m
labels:
severity: warning
annotations:
summary: "High error rate detected"
description: "HTTP 5xx error rate is above 5% for more than 3 minutes."
- name: infrastructure_alerts
rules:
# Container monitoring
- alert: ContainerRestarted
expr: increase(container_last_seen{name="littleshop_prod"}[1h]) > 0
labels:
severity: warning
annotations:
summary: "Container has been restarted"
description: "LittleShop container has been restarted within the last hour."
# Database alerts
- alert: DatabaseConnectionFailed
expr: littleshop_database_connections_failed_total > 0
for: 1m
labels:
severity: critical
annotations:
summary: "Database connection failures detected"
description: "LittleShop is experiencing database connection failures."