Complete implementation of multi-server Oracle database support: Backend: - Multi-pool Oracle with lazy loading per server - Email-to-server cache for automatic server discovery - JWT tokens include server_id claim - /auth/check-identity and /auth/check-email endpoints - /auth/my-servers endpoint for listing user's accessible servers - Server switch with password re-authentication Frontend: - New ServerSelector component for header dropdown - Multi-step login flow (identity → server → password) - Server switching from header with password modal - Mobile drawer menu with server selection - Dark mode support for all new components - URL bookmark support with ?server= query param Scripts: - Unified start.sh replacing start-prod.sh/start-test.sh - Unified ssh-tunnel.sh with multi-server support - Updated status.sh for new architecture Tests: - E2E tests for multi-server and single-server login flows - Backend unit tests for all new endpoints - Oracle multi-pool integration tests Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
479 lines
16 KiB
Bash
479 lines
16 KiB
Bash
#!/bin/bash
|
||
# ROA2WEB Comprehensive Health Check Script
|
||
# Monitors all services and provides detailed health information
|
||
|
||
set -e
|
||
|
||
# Configuration
|
||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
|
||
LOG_FILE="$PROJECT_DIR/health-check.log"
|
||
|
||
# Colors for output
|
||
RED='\033[0;31m'
|
||
GREEN='\033[0;32m'
|
||
YELLOW='\033[1;33m'
|
||
BLUE='\033[0;34m'
|
||
PURPLE='\033[0;35m'
|
||
CYAN='\033[0;36m'
|
||
NC='\033[0m' # No Color
|
||
|
||
# Health check results
|
||
OVERALL_HEALTH=true
|
||
ISSUES=()
|
||
|
||
# Logging function
|
||
log() {
|
||
local level=$1
|
||
shift
|
||
local message="$*"
|
||
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
|
||
echo -e "[$timestamp] [$level] $message" | tee -a "$LOG_FILE"
|
||
}
|
||
|
||
# Status icons
|
||
status_icon() {
|
||
local status=$1
|
||
case $status in
|
||
"healthy") echo "✅" ;;
|
||
"warning") echo "⚠️" ;;
|
||
"error") echo "❌" ;;
|
||
"info") echo "ℹ️" ;;
|
||
*) echo "❓" ;;
|
||
esac
|
||
}
|
||
|
||
# Print section header
|
||
section_header() {
|
||
local title=$1
|
||
echo ""
|
||
echo -e "${BLUE}=================================${NC}"
|
||
echo -e "${BLUE}$title${NC}"
|
||
echo -e "${BLUE}=================================${NC}"
|
||
}
|
||
|
||
# Add issue to report
|
||
add_issue() {
|
||
local severity=$1
|
||
local component=$2
|
||
local message=$3
|
||
|
||
ISSUES+=("[$severity] $component: $message")
|
||
|
||
if [[ "$severity" == "ERROR" ]]; then
|
||
OVERALL_HEALTH=false
|
||
fi
|
||
}
|
||
|
||
# Check if service is running
|
||
check_service_running() {
|
||
local service_name=$1
|
||
local container_name=$2
|
||
|
||
if docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}$service_name is running${NC}"
|
||
return 0
|
||
else
|
||
echo -e "$(status_icon "error") ${RED}$service_name is not running${NC}"
|
||
add_issue "ERROR" "$service_name" "Container not running"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# HTTP health check
|
||
http_health_check() {
|
||
local service_name=$1
|
||
local url=$2
|
||
local expected_status=${3:-200}
|
||
local timeout=${4:-10}
|
||
|
||
local response
|
||
local status_code
|
||
|
||
response=$(curl -s -w "%{http_code}" --max-time "$timeout" "$url" 2>/dev/null || echo "000")
|
||
status_code="${response: -3}"
|
||
|
||
if [[ "$status_code" == "$expected_status" ]]; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}$service_name HTTP health check passed ($status_code)${NC}"
|
||
return 0
|
||
else
|
||
echo -e "$(status_icon "error") ${RED}$service_name HTTP health check failed ($status_code)${NC}"
|
||
add_issue "ERROR" "$service_name" "HTTP health check failed with status $status_code"
|
||
return 1
|
||
fi
|
||
}
|
||
|
||
# Docker container health check
|
||
docker_health_check() {
|
||
local service_name=$1
|
||
local container_name=$2
|
||
|
||
local health_status
|
||
health_status=$(docker inspect --format='{{.State.Health.Status}}' "$container_name" 2>/dev/null || echo "no-healthcheck")
|
||
|
||
case $health_status in
|
||
"healthy")
|
||
echo -e "$(status_icon "healthy") ${GREEN}$service_name Docker health check: healthy${NC}"
|
||
return 0
|
||
;;
|
||
"unhealthy")
|
||
echo -e "$(status_icon "error") ${RED}$service_name Docker health check: unhealthy${NC}"
|
||
add_issue "ERROR" "$service_name" "Docker health check reports unhealthy"
|
||
return 1
|
||
;;
|
||
"starting")
|
||
echo -e "$(status_icon "warning") ${YELLOW}$service_name Docker health check: starting${NC}"
|
||
add_issue "WARNING" "$service_name" "Docker health check still starting"
|
||
return 1
|
||
;;
|
||
"no-healthcheck")
|
||
echo -e "$(status_icon "info") ${CYAN}$service_name: No Docker health check configured${NC}"
|
||
return 0
|
||
;;
|
||
*)
|
||
echo -e "$(status_icon "error") ${RED}$service_name Docker health check: unknown status ($health_status)${NC}"
|
||
add_issue "ERROR" "$service_name" "Unknown Docker health check status: $health_status"
|
||
return 1
|
||
;;
|
||
esac
|
||
}
|
||
|
||
# Check container resources
|
||
check_container_resources() {
|
||
local service_name=$1
|
||
local container_name=$2
|
||
|
||
if ! docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
|
||
return 1
|
||
fi
|
||
|
||
local stats
|
||
stats=$(docker stats "$container_name" --no-stream --format "table {{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" 2>/dev/null | tail -n1)
|
||
|
||
if [[ -n "$stats" ]]; then
|
||
local cpu_percent=$(echo "$stats" | awk '{print $1}' | sed 's/%//')
|
||
local mem_usage=$(echo "$stats" | awk '{print $2}')
|
||
local mem_percent=$(echo "$stats" | awk '{print $3}' | sed 's/%//')
|
||
|
||
echo -e "$(status_icon "info") ${CYAN}$service_name Resources: CPU ${cpu_percent}%, Memory ${mem_usage} (${mem_percent}%)${NC}"
|
||
|
||
# Check for resource warnings
|
||
if (( $(echo "$cpu_percent > 80" | bc -l) )); then
|
||
add_issue "WARNING" "$service_name" "High CPU usage: ${cpu_percent}%"
|
||
fi
|
||
|
||
if (( $(echo "$mem_percent > 80" | bc -l) )); then
|
||
add_issue "WARNING" "$service_name" "High memory usage: ${mem_percent}%"
|
||
fi
|
||
fi
|
||
}
|
||
|
||
# Check logs for errors
|
||
check_container_logs() {
|
||
local service_name=$1
|
||
local container_name=$2
|
||
|
||
if ! docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
|
||
return 1
|
||
fi
|
||
|
||
local error_count
|
||
error_count=$(docker logs "$container_name" --since="5m" 2>&1 | grep -i "error\|exception\|failed\|fatal" | wc -l)
|
||
|
||
if [[ "$error_count" -gt 0 ]]; then
|
||
echo -e "$(status_icon "warning") ${YELLOW}$service_name: $error_count errors in last 5 minutes${NC}"
|
||
add_issue "WARNING" "$service_name" "$error_count errors found in recent logs"
|
||
|
||
# Show recent errors
|
||
echo -e "${YELLOW}Recent errors:${NC}"
|
||
docker logs "$container_name" --since="5m" 2>&1 | grep -i "error\|exception\|failed\|fatal" | tail -3 | sed 's/^/ /'
|
||
else
|
||
echo -e "$(status_icon "healthy") ${GREEN}$service_name: No recent errors in logs${NC}"
|
||
fi
|
||
}
|
||
|
||
# Check disk space
|
||
check_disk_space() {
|
||
section_header "DISK SPACE CHECK"
|
||
|
||
local disk_usage
|
||
disk_usage=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
|
||
|
||
echo -e "$(status_icon "info") ${CYAN}Root filesystem usage: ${disk_usage}%${NC}"
|
||
|
||
if [[ "$disk_usage" -gt 90 ]]; then
|
||
echo -e "$(status_icon "error") ${RED}Critical: Disk space usage is ${disk_usage}%${NC}"
|
||
add_issue "ERROR" "System" "Critical disk space usage: ${disk_usage}%"
|
||
elif [[ "$disk_usage" -gt 80 ]]; then
|
||
echo -e "$(status_icon "warning") ${YELLOW}Warning: Disk space usage is ${disk_usage}%${NC}"
|
||
add_issue "WARNING" "System" "High disk space usage: ${disk_usage}%"
|
||
else
|
||
echo -e "$(status_icon "healthy") ${GREEN}Disk space usage is acceptable${NC}"
|
||
fi
|
||
|
||
# Check Docker space
|
||
local docker_space
|
||
docker_space=$(docker system df --format "table {{.Type}}\t{{.Total}}\t{{.Active}}\t{{.Size}}\t{{.Reclaimable}}" 2>/dev/null || echo "Docker space info unavailable")
|
||
|
||
if [[ "$docker_space" != "Docker space info unavailable" ]]; then
|
||
echo ""
|
||
echo -e "${CYAN}Docker space usage:${NC}"
|
||
echo "$docker_space"
|
||
fi
|
||
}
|
||
|
||
# Check network connectivity
|
||
check_network() {
|
||
section_header "NETWORK CONNECTIVITY CHECK"
|
||
|
||
# Check if Docker network exists
|
||
if docker network ls | grep -q "roa-network"; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}Docker network 'roa-network' exists${NC}"
|
||
else
|
||
echo -e "$(status_icon "error") ${RED}Docker network 'roa-network' not found${NC}"
|
||
add_issue "ERROR" "Network" "Docker network 'roa-network' not found"
|
||
fi
|
||
|
||
# Check external connectivity
|
||
if ping -c 1 8.8.8.8 &> /dev/null; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}External network connectivity: OK${NC}"
|
||
else
|
||
echo -e "$(status_icon "warning") ${YELLOW}External network connectivity: Limited${NC}"
|
||
add_issue "WARNING" "Network" "Limited external network connectivity"
|
||
fi
|
||
|
||
# Check DNS resolution
|
||
if nslookup google.com &> /dev/null; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}DNS resolution: OK${NC}"
|
||
else
|
||
echo -e "$(status_icon "warning") ${YELLOW}DNS resolution: Issues detected${NC}"
|
||
add_issue "WARNING" "Network" "DNS resolution issues detected"
|
||
fi
|
||
}
|
||
|
||
# Check database connectivity
|
||
check_database() {
|
||
section_header "DATABASE CONNECTIVITY CHECK"
|
||
|
||
# Load environment variables
|
||
if [[ -f "$PROJECT_DIR/.env" ]]; then
|
||
set -a
|
||
source "$PROJECT_DIR/.env"
|
||
set +a
|
||
elif [[ -f "$PROJECT_DIR/.env.production" ]]; then
|
||
set -a
|
||
source "$PROJECT_DIR/.env.production"
|
||
set +a
|
||
fi
|
||
|
||
# Check SSH tunnel if needed
|
||
if [[ "$ORACLE_HOST" == "localhost" && -f "$PROJECT_DIR/ssh-tunnel.sh" ]]; then
|
||
local tunnel_status
|
||
tunnel_status=$("$PROJECT_DIR/ssh-tunnel.sh" status 2>/dev/null || echo "not running")
|
||
|
||
if [[ "$tunnel_status" == *"running"* ]]; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}SSH tunnel is running${NC}"
|
||
else
|
||
echo -e "$(status_icon "warning") ${YELLOW}SSH tunnel is not running${NC}"
|
||
add_issue "WARNING" "Database" "SSH tunnel is not running"
|
||
fi
|
||
fi
|
||
|
||
# Test Oracle connection (if we can)
|
||
if command -v sqlplus &> /dev/null && [[ -n "$ORACLE_USER" && -n "$ORACLE_PASSWORD" ]]; then
|
||
local connection_test
|
||
connection_test=$(timeout 10 sqlplus -s "$ORACLE_USER/$ORACLE_PASSWORD@$ORACLE_HOST:$ORACLE_PORT/$ORACLE_SID" <<< "SELECT 'OK' FROM DUAL; EXIT;" 2>/dev/null | grep "OK" || echo "failed")
|
||
|
||
if [[ "$connection_test" == "OK" ]]; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}Oracle database connection: OK${NC}"
|
||
else
|
||
echo -e "$(status_icon "error") ${RED}Oracle database connection: Failed${NC}"
|
||
add_issue "ERROR" "Database" "Cannot connect to Oracle database"
|
||
fi
|
||
else
|
||
echo -e "$(status_icon "info") ${CYAN}Oracle connection test skipped (sqlplus not available or credentials not set)${NC}"
|
||
fi
|
||
}
|
||
|
||
# Check services
|
||
check_services() {
|
||
section_header "SERVICES HEALTH CHECK"
|
||
|
||
# Backend service
|
||
echo -e "${PURPLE}ROA Backend Service:${NC}"
|
||
check_service_running "Backend" "roa-backend"
|
||
docker_health_check "Backend" "roa-backend"
|
||
http_health_check "Backend API" "http://localhost/api/health"
|
||
check_container_resources "Backend" "roa-backend"
|
||
check_container_logs "Backend" "roa-backend"
|
||
|
||
echo ""
|
||
|
||
# Frontend service
|
||
echo -e "${PURPLE}ROA Frontend Service:${NC}"
|
||
check_service_running "Frontend" "roa-frontend"
|
||
docker_health_check "Frontend" "roa-frontend"
|
||
http_health_check "Frontend" "http://localhost:3000/health"
|
||
check_container_resources "Frontend" "roa-frontend"
|
||
check_container_logs "Frontend" "roa-frontend"
|
||
|
||
echo ""
|
||
|
||
# Gateway service
|
||
echo -e "${PURPLE}ROA Gateway Service:${NC}"
|
||
check_service_running "Gateway" "roa-gateway"
|
||
docker_health_check "Gateway" "roa-gateway"
|
||
http_health_check "Gateway" "http://localhost/health"
|
||
check_container_resources "Gateway" "roa-gateway"
|
||
check_container_logs "Gateway" "roa-gateway"
|
||
|
||
echo ""
|
||
|
||
# Redis service
|
||
echo -e "${PURPLE}ROA Redis Service:${NC}"
|
||
check_service_running "Redis" "roa-redis"
|
||
docker_health_check "Redis" "roa-redis"
|
||
check_container_resources "Redis" "roa-redis"
|
||
check_container_logs "Redis" "roa-redis"
|
||
}
|
||
|
||
# Generate summary report
|
||
generate_summary() {
|
||
section_header "HEALTH CHECK SUMMARY"
|
||
|
||
if [[ "$OVERALL_HEALTH" == "true" ]]; then
|
||
echo -e "$(status_icon "healthy") ${GREEN}Overall System Health: HEALTHY${NC}"
|
||
else
|
||
echo -e "$(status_icon "error") ${RED}Overall System Health: ISSUES DETECTED${NC}"
|
||
fi
|
||
|
||
echo ""
|
||
echo -e "${CYAN}Timestamp: $(date)${NC}"
|
||
|
||
if [[ ${#ISSUES[@]} -gt 0 ]]; then
|
||
echo ""
|
||
echo -e "${YELLOW}Issues found:${NC}"
|
||
for issue in "${ISSUES[@]}"; do
|
||
echo " $issue"
|
||
done
|
||
else
|
||
echo ""
|
||
echo -e "${GREEN}No issues detected${NC}"
|
||
fi
|
||
|
||
# Exit with appropriate code
|
||
if [[ "$OVERALL_HEALTH" == "true" ]]; then
|
||
exit 0
|
||
else
|
||
exit 1
|
||
fi
|
||
}
|
||
|
||
# Watch mode - continuous monitoring
|
||
watch_mode() {
|
||
echo -e "${BLUE}Starting continuous health monitoring...${NC}"
|
||
echo -e "${CYAN}Press Ctrl+C to stop${NC}"
|
||
echo ""
|
||
|
||
while true; do
|
||
clear
|
||
echo -e "${BLUE}ROA2WEB Health Monitor - $(date)${NC}"
|
||
|
||
# Reset status
|
||
OVERALL_HEALTH=true
|
||
ISSUES=()
|
||
|
||
# Quick service check
|
||
echo ""
|
||
echo -e "${PURPLE}Service Status:${NC}"
|
||
check_service_running "Backend" "roa-backend" > /dev/null 2>&1 && echo -e " Backend: $(status_icon "healthy")" || echo -e " Backend: $(status_icon "error")"
|
||
check_service_running "Frontend" "roa-frontend" > /dev/null 2>&1 && echo -e " Frontend: $(status_icon "healthy")" || echo -e " Frontend: $(status_icon "error")"
|
||
check_service_running "Gateway" "roa-gateway" > /dev/null 2>&1 && echo -e " Gateway: $(status_icon "healthy")" || echo -e " Gateway: $(status_icon "error")"
|
||
check_service_running "Redis" "roa-redis" > /dev/null 2>&1 && echo -e " Redis: $(status_icon "healthy")" || echo -e " Redis: $(status_icon "error")"
|
||
|
||
# Quick HTTP checks
|
||
echo ""
|
||
echo -e "${PURPLE}API Status:${NC}"
|
||
http_health_check "Backend API" "http://localhost/api/health" 200 5 > /dev/null 2>&1 && echo -e " API: $(status_icon "healthy")" || echo -e " API: $(status_icon "error")"
|
||
http_health_check "Frontend" "http://localhost/health" 200 5 > /dev/null 2>&1 && echo -e " Frontend: $(status_icon "healthy")" || echo -e " Frontend: $(status_icon "error")"
|
||
|
||
if [[ ${#ISSUES[@]} -gt 0 ]]; then
|
||
echo ""
|
||
echo -e "${YELLOW}Current Issues:${NC}"
|
||
for issue in "${ISSUES[@]}"; do
|
||
echo " $issue"
|
||
done
|
||
fi
|
||
|
||
sleep 30
|
||
done
|
||
}
|
||
|
||
# Main function
|
||
main() {
|
||
local action=${1:-full}
|
||
|
||
case $action in
|
||
"full")
|
||
echo -e "${BLUE}ROA2WEB Comprehensive Health Check${NC}"
|
||
echo -e "${CYAN}$(date)${NC}"
|
||
|
||
check_services
|
||
check_disk_space
|
||
check_network
|
||
check_database
|
||
generate_summary
|
||
;;
|
||
"quick")
|
||
echo -e "${BLUE}ROA2WEB Quick Health Check${NC}"
|
||
|
||
# Reset status
|
||
OVERALL_HEALTH=true
|
||
ISSUES=()
|
||
|
||
check_services
|
||
generate_summary
|
||
;;
|
||
"services")
|
||
check_services
|
||
;;
|
||
"network")
|
||
check_network
|
||
;;
|
||
"database")
|
||
check_database
|
||
;;
|
||
"watch")
|
||
watch_mode
|
||
;;
|
||
*)
|
||
echo "Usage: $0 {full|quick|services|network|database|watch}"
|
||
echo ""
|
||
echo "Commands:"
|
||
echo " full - Comprehensive health check (default)"
|
||
echo " quick - Quick services health check"
|
||
echo " services - Check only ROA2WEB services"
|
||
echo " network - Check network connectivity"
|
||
echo " database - Check database connectivity"
|
||
echo " watch - Continuous monitoring mode"
|
||
exit 1
|
||
;;
|
||
esac
|
||
}
|
||
|
||
# Make sure bc is available for numeric comparisons
|
||
if ! command -v bc &> /dev/null; then
|
||
# Fallback function for numeric comparison without bc
|
||
compare_float() {
|
||
local val1=$1
|
||
local op=$2
|
||
local val2=$3
|
||
python3 -c "print($val1 $op $val2)" 2>/dev/null || echo "false"
|
||
}
|
||
|
||
# Replace bc usage with python3
|
||
alias bc='python3 -c'
|
||
fi
|
||
|
||
# Run main function
|
||
main "$@" |