freealberta/test-monitoring.sh

164 lines
4.7 KiB
Bash
Executable File

#!/bin/bash
# Changemaker Lite Monitoring Test Script
# Tests that all monitoring components are working correctly
set -e
echo "========================================="
echo "Changemaker Lite Monitoring Test"
echo "========================================="
echo ""
# Colors for output
GREEN='\033[0;32m'
RED='\033[0;31m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Function to check if service is responding
check_service() {
local service_name=$1
local url=$2
local expected_status=${3:-200}
echo -n "Testing $service_name... "
if response=$(curl -s -o /dev/null -w "%{http_code}" "$url" 2>/dev/null); then
if [ "$response" = "$expected_status" ]; then
echo -e "${GREEN}✓ OK${NC} (HTTP $response)"
return 0
else
echo -e "${YELLOW}⚠ WARNING${NC} (HTTP $response, expected $expected_status)"
return 1
fi
else
echo -e "${RED}✗ FAILED${NC} (No response)"
return 1
fi
}
# Check if monitoring profile is running
echo "1. Checking Docker containers..."
echo ""
if ! docker compose ps | grep -q "prometheus"; then
echo -e "${RED}✗ Monitoring services not running!${NC}"
echo ""
echo "Start monitoring with:"
echo " docker compose --profile monitoring up -d"
echo ""
exit 1
fi
echo -e "${GREEN}✓ Monitoring containers are running${NC}"
echo ""
# Test each service
echo "2. Testing service endpoints..."
echo ""
failures=0
check_service "Prometheus" "http://localhost:9090/-/healthy" || ((failures++))
check_service "Grafana" "http://localhost:3001/api/health" || ((failures++))
check_service "Alertmanager" "http://localhost:9093/-/healthy" || ((failures++))
check_service "Gotify" "http://localhost:8889/health" || ((failures++))
check_service "cAdvisor" "http://localhost:8080/healthz" || ((failures++))
check_service "Node Exporter" "http://localhost:9100/metrics" || ((failures++))
check_service "Redis Exporter" "http://localhost:9121/metrics" || ((failures++))
echo ""
# Check Prometheus targets
echo "3. Checking Prometheus targets..."
echo ""
if targets=$(curl -s "http://localhost:9090/api/v1/targets" 2>/dev/null); then
active_targets=$(echo "$targets" | grep -o '"health":"up"' | wc -l)
total_targets=$(echo "$targets" | grep -o '"health":"' | wc -l)
echo " Active targets: $active_targets / $total_targets"
if [ "$active_targets" -gt 0 ]; then
echo -e "${GREEN}✓ Prometheus is scraping targets${NC}"
else
echo -e "${RED}✗ No active Prometheus targets!${NC}"
((failures++))
fi
else
echo -e "${RED}✗ Cannot fetch Prometheus targets${NC}"
((failures++))
fi
echo ""
# Check for active alerts
echo "4. Checking for active alerts..."
echo ""
if alerts=$(curl -s "http://localhost:9090/api/v1/alerts" 2>/dev/null); then
firing_alerts=$(echo "$alerts" | grep -o '"state":"firing"' | wc -l)
pending_alerts=$(echo "$alerts" | grep -o '"state":"pending"' | wc -l)
echo " Firing alerts: $firing_alerts"
echo " Pending alerts: $pending_alerts"
if [ "$firing_alerts" -eq 0 ]; then
echo -e "${GREEN}✓ No alerts currently firing${NC}"
else
echo -e "${YELLOW}⚠ There are $firing_alerts firing alerts${NC}"
echo " Check Prometheus: http://localhost:9090/alerts"
fi
else
echo -e "${RED}✗ Cannot fetch alert status${NC}"
((failures++))
fi
echo ""
# Check Alertmanager configuration
echo "5. Verifying Alertmanager configuration..."
echo ""
if config=$(curl -s "http://localhost:9093/api/v1/status" 2>/dev/null); then
if echo "$config" | grep -q "gotify"; then
echo -e "${GREEN}✓ Alertmanager configured with Gotify${NC}"
else
echo -e "${YELLOW}⚠ Gotify not found in Alertmanager config${NC}"
echo " Check configs/alertmanager/alertmanager.yml"
fi
else
echo -e "${RED}✗ Cannot fetch Alertmanager config${NC}"
((failures++))
fi
echo ""
# Summary
echo "========================================="
echo "Test Summary"
echo "========================================="
echo ""
if [ $failures -eq 0 ]; then
echo -e "${GREEN}✓ All tests passed!${NC}"
echo ""
echo "Monitoring stack is healthy. Next steps:"
echo " 1. Access Grafana: http://localhost:3001"
echo " 2. View Prometheus: http://localhost:9090"
echo " 3. Configure Gotify app token (see MONITORING.md)"
echo " 4. Set up mobile push notifications"
echo ""
exit 0
else
echo -e "${RED}$failures test(s) failed${NC}"
echo ""
echo "Troubleshooting:"
echo " 1. Check logs: docker compose logs <service-name>"
echo " 2. Verify services are running: docker compose ps"
echo " 3. Review MONITORING.md for setup instructions"
echo ""
exit 1
fi