Files
roa2web-service-auto/scripts/health-check.sh
Claude Agent b137e80b71 feat: multi-Oracle server support with runtime switching
Complete implementation of multi-server Oracle database support:

Backend:
- Multi-pool Oracle with lazy loading per server
- Email-to-server cache for automatic server discovery
- JWT tokens include server_id claim
- /auth/check-identity and /auth/check-email endpoints
- /auth/my-servers endpoint for listing user's accessible servers
- Server switch with password re-authentication

Frontend:
- New ServerSelector component for header dropdown
- Multi-step login flow (identity → server → password)
- Server switching from header with password modal
- Mobile drawer menu with server selection
- Dark mode support for all new components
- URL bookmark support with ?server= query param

Scripts:
- Unified start.sh replacing start-prod.sh/start-test.sh
- Unified ssh-tunnel.sh with multi-server support
- Updated status.sh for new architecture

Tests:
- E2E tests for multi-server and single-server login flows
- Backend unit tests for all new endpoints
- Oracle multi-pool integration tests

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-26 22:39:06 +00:00

479 lines
16 KiB
Bash
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# ROA2WEB Comprehensive Health Check Script
# Monitors all services and provides detailed health information
set -e
# Configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
LOG_FILE="$PROJECT_DIR/health-check.log"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
# Health check results
OVERALL_HEALTH=true
ISSUES=()
# Logging function
log() {
local level=$1
shift
local message="$*"
local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
echo -e "[$timestamp] [$level] $message" | tee -a "$LOG_FILE"
}
# Status icons
status_icon() {
local status=$1
case $status in
"healthy") echo "✅" ;;
"warning") echo "⚠️" ;;
"error") echo "❌" ;;
"info") echo "" ;;
*) echo "❓" ;;
esac
}
# Print section header
section_header() {
local title=$1
echo ""
echo -e "${BLUE}=================================${NC}"
echo -e "${BLUE}$title${NC}"
echo -e "${BLUE}=================================${NC}"
}
# Add issue to report
add_issue() {
local severity=$1
local component=$2
local message=$3
ISSUES+=("[$severity] $component: $message")
if [[ "$severity" == "ERROR" ]]; then
OVERALL_HEALTH=false
fi
}
# Check if service is running
check_service_running() {
local service_name=$1
local container_name=$2
if docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
echo -e "$(status_icon "healthy") ${GREEN}$service_name is running${NC}"
return 0
else
echo -e "$(status_icon "error") ${RED}$service_name is not running${NC}"
add_issue "ERROR" "$service_name" "Container not running"
return 1
fi
}
# HTTP health check
http_health_check() {
local service_name=$1
local url=$2
local expected_status=${3:-200}
local timeout=${4:-10}
local response
local status_code
response=$(curl -s -w "%{http_code}" --max-time "$timeout" "$url" 2>/dev/null || echo "000")
status_code="${response: -3}"
if [[ "$status_code" == "$expected_status" ]]; then
echo -e "$(status_icon "healthy") ${GREEN}$service_name HTTP health check passed ($status_code)${NC}"
return 0
else
echo -e "$(status_icon "error") ${RED}$service_name HTTP health check failed ($status_code)${NC}"
add_issue "ERROR" "$service_name" "HTTP health check failed with status $status_code"
return 1
fi
}
# Docker container health check
docker_health_check() {
local service_name=$1
local container_name=$2
local health_status
health_status=$(docker inspect --format='{{.State.Health.Status}}' "$container_name" 2>/dev/null || echo "no-healthcheck")
case $health_status in
"healthy")
echo -e "$(status_icon "healthy") ${GREEN}$service_name Docker health check: healthy${NC}"
return 0
;;
"unhealthy")
echo -e "$(status_icon "error") ${RED}$service_name Docker health check: unhealthy${NC}"
add_issue "ERROR" "$service_name" "Docker health check reports unhealthy"
return 1
;;
"starting")
echo -e "$(status_icon "warning") ${YELLOW}$service_name Docker health check: starting${NC}"
add_issue "WARNING" "$service_name" "Docker health check still starting"
return 1
;;
"no-healthcheck")
echo -e "$(status_icon "info") ${CYAN}$service_name: No Docker health check configured${NC}"
return 0
;;
*)
echo -e "$(status_icon "error") ${RED}$service_name Docker health check: unknown status ($health_status)${NC}"
add_issue "ERROR" "$service_name" "Unknown Docker health check status: $health_status"
return 1
;;
esac
}
# Check container resources
check_container_resources() {
local service_name=$1
local container_name=$2
if ! docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
return 1
fi
local stats
stats=$(docker stats "$container_name" --no-stream --format "table {{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}" 2>/dev/null | tail -n1)
if [[ -n "$stats" ]]; then
local cpu_percent=$(echo "$stats" | awk '{print $1}' | sed 's/%//')
local mem_usage=$(echo "$stats" | awk '{print $2}')
local mem_percent=$(echo "$stats" | awk '{print $3}' | sed 's/%//')
echo -e "$(status_icon "info") ${CYAN}$service_name Resources: CPU ${cpu_percent}%, Memory ${mem_usage} (${mem_percent}%)${NC}"
# Check for resource warnings
if (( $(echo "$cpu_percent > 80" | bc -l) )); then
add_issue "WARNING" "$service_name" "High CPU usage: ${cpu_percent}%"
fi
if (( $(echo "$mem_percent > 80" | bc -l) )); then
add_issue "WARNING" "$service_name" "High memory usage: ${mem_percent}%"
fi
fi
}
# Check logs for errors
check_container_logs() {
local service_name=$1
local container_name=$2
if ! docker ps --format "table {{.Names}}" | grep -q "^$container_name$"; then
return 1
fi
local error_count
error_count=$(docker logs "$container_name" --since="5m" 2>&1 | grep -i "error\|exception\|failed\|fatal" | wc -l)
if [[ "$error_count" -gt 0 ]]; then
echo -e "$(status_icon "warning") ${YELLOW}$service_name: $error_count errors in last 5 minutes${NC}"
add_issue "WARNING" "$service_name" "$error_count errors found in recent logs"
# Show recent errors
echo -e "${YELLOW}Recent errors:${NC}"
docker logs "$container_name" --since="5m" 2>&1 | grep -i "error\|exception\|failed\|fatal" | tail -3 | sed 's/^/ /'
else
echo -e "$(status_icon "healthy") ${GREEN}$service_name: No recent errors in logs${NC}"
fi
}
# Check disk space
check_disk_space() {
section_header "DISK SPACE CHECK"
local disk_usage
disk_usage=$(df -h / | awk 'NR==2 {print $5}' | sed 's/%//')
echo -e "$(status_icon "info") ${CYAN}Root filesystem usage: ${disk_usage}%${NC}"
if [[ "$disk_usage" -gt 90 ]]; then
echo -e "$(status_icon "error") ${RED}Critical: Disk space usage is ${disk_usage}%${NC}"
add_issue "ERROR" "System" "Critical disk space usage: ${disk_usage}%"
elif [[ "$disk_usage" -gt 80 ]]; then
echo -e "$(status_icon "warning") ${YELLOW}Warning: Disk space usage is ${disk_usage}%${NC}"
add_issue "WARNING" "System" "High disk space usage: ${disk_usage}%"
else
echo -e "$(status_icon "healthy") ${GREEN}Disk space usage is acceptable${NC}"
fi
# Check Docker space
local docker_space
docker_space=$(docker system df --format "table {{.Type}}\t{{.Total}}\t{{.Active}}\t{{.Size}}\t{{.Reclaimable}}" 2>/dev/null || echo "Docker space info unavailable")
if [[ "$docker_space" != "Docker space info unavailable" ]]; then
echo ""
echo -e "${CYAN}Docker space usage:${NC}"
echo "$docker_space"
fi
}
# Check network connectivity
check_network() {
section_header "NETWORK CONNECTIVITY CHECK"
# Check if Docker network exists
if docker network ls | grep -q "roa-network"; then
echo -e "$(status_icon "healthy") ${GREEN}Docker network 'roa-network' exists${NC}"
else
echo -e "$(status_icon "error") ${RED}Docker network 'roa-network' not found${NC}"
add_issue "ERROR" "Network" "Docker network 'roa-network' not found"
fi
# Check external connectivity
if ping -c 1 8.8.8.8 &> /dev/null; then
echo -e "$(status_icon "healthy") ${GREEN}External network connectivity: OK${NC}"
else
echo -e "$(status_icon "warning") ${YELLOW}External network connectivity: Limited${NC}"
add_issue "WARNING" "Network" "Limited external network connectivity"
fi
# Check DNS resolution
if nslookup google.com &> /dev/null; then
echo -e "$(status_icon "healthy") ${GREEN}DNS resolution: OK${NC}"
else
echo -e "$(status_icon "warning") ${YELLOW}DNS resolution: Issues detected${NC}"
add_issue "WARNING" "Network" "DNS resolution issues detected"
fi
}
# Check database connectivity
check_database() {
section_header "DATABASE CONNECTIVITY CHECK"
# Load environment variables
if [[ -f "$PROJECT_DIR/.env" ]]; then
set -a
source "$PROJECT_DIR/.env"
set +a
elif [[ -f "$PROJECT_DIR/.env.production" ]]; then
set -a
source "$PROJECT_DIR/.env.production"
set +a
fi
# Check SSH tunnel if needed
if [[ "$ORACLE_HOST" == "localhost" && -f "$PROJECT_DIR/ssh-tunnel.sh" ]]; then
local tunnel_status
tunnel_status=$("$PROJECT_DIR/ssh-tunnel.sh" status 2>/dev/null || echo "not running")
if [[ "$tunnel_status" == *"running"* ]]; then
echo -e "$(status_icon "healthy") ${GREEN}SSH tunnel is running${NC}"
else
echo -e "$(status_icon "warning") ${YELLOW}SSH tunnel is not running${NC}"
add_issue "WARNING" "Database" "SSH tunnel is not running"
fi
fi
# Test Oracle connection (if we can)
if command -v sqlplus &> /dev/null && [[ -n "$ORACLE_USER" && -n "$ORACLE_PASSWORD" ]]; then
local connection_test
connection_test=$(timeout 10 sqlplus -s "$ORACLE_USER/$ORACLE_PASSWORD@$ORACLE_HOST:$ORACLE_PORT/$ORACLE_SID" <<< "SELECT 'OK' FROM DUAL; EXIT;" 2>/dev/null | grep "OK" || echo "failed")
if [[ "$connection_test" == "OK" ]]; then
echo -e "$(status_icon "healthy") ${GREEN}Oracle database connection: OK${NC}"
else
echo -e "$(status_icon "error") ${RED}Oracle database connection: Failed${NC}"
add_issue "ERROR" "Database" "Cannot connect to Oracle database"
fi
else
echo -e "$(status_icon "info") ${CYAN}Oracle connection test skipped (sqlplus not available or credentials not set)${NC}"
fi
}
# Check services
check_services() {
section_header "SERVICES HEALTH CHECK"
# Backend service
echo -e "${PURPLE}ROA Backend Service:${NC}"
check_service_running "Backend" "roa-backend"
docker_health_check "Backend" "roa-backend"
http_health_check "Backend API" "http://localhost/api/health"
check_container_resources "Backend" "roa-backend"
check_container_logs "Backend" "roa-backend"
echo ""
# Frontend service
echo -e "${PURPLE}ROA Frontend Service:${NC}"
check_service_running "Frontend" "roa-frontend"
docker_health_check "Frontend" "roa-frontend"
http_health_check "Frontend" "http://localhost:3000/health"
check_container_resources "Frontend" "roa-frontend"
check_container_logs "Frontend" "roa-frontend"
echo ""
# Gateway service
echo -e "${PURPLE}ROA Gateway Service:${NC}"
check_service_running "Gateway" "roa-gateway"
docker_health_check "Gateway" "roa-gateway"
http_health_check "Gateway" "http://localhost/health"
check_container_resources "Gateway" "roa-gateway"
check_container_logs "Gateway" "roa-gateway"
echo ""
# Redis service
echo -e "${PURPLE}ROA Redis Service:${NC}"
check_service_running "Redis" "roa-redis"
docker_health_check "Redis" "roa-redis"
check_container_resources "Redis" "roa-redis"
check_container_logs "Redis" "roa-redis"
}
# Generate summary report
generate_summary() {
section_header "HEALTH CHECK SUMMARY"
if [[ "$OVERALL_HEALTH" == "true" ]]; then
echo -e "$(status_icon "healthy") ${GREEN}Overall System Health: HEALTHY${NC}"
else
echo -e "$(status_icon "error") ${RED}Overall System Health: ISSUES DETECTED${NC}"
fi
echo ""
echo -e "${CYAN}Timestamp: $(date)${NC}"
if [[ ${#ISSUES[@]} -gt 0 ]]; then
echo ""
echo -e "${YELLOW}Issues found:${NC}"
for issue in "${ISSUES[@]}"; do
echo " $issue"
done
else
echo ""
echo -e "${GREEN}No issues detected${NC}"
fi
# Exit with appropriate code
if [[ "$OVERALL_HEALTH" == "true" ]]; then
exit 0
else
exit 1
fi
}
# Watch mode - continuous monitoring
watch_mode() {
echo -e "${BLUE}Starting continuous health monitoring...${NC}"
echo -e "${CYAN}Press Ctrl+C to stop${NC}"
echo ""
while true; do
clear
echo -e "${BLUE}ROA2WEB Health Monitor - $(date)${NC}"
# Reset status
OVERALL_HEALTH=true
ISSUES=()
# Quick service check
echo ""
echo -e "${PURPLE}Service Status:${NC}"
check_service_running "Backend" "roa-backend" > /dev/null 2>&1 && echo -e " Backend: $(status_icon "healthy")" || echo -e " Backend: $(status_icon "error")"
check_service_running "Frontend" "roa-frontend" > /dev/null 2>&1 && echo -e " Frontend: $(status_icon "healthy")" || echo -e " Frontend: $(status_icon "error")"
check_service_running "Gateway" "roa-gateway" > /dev/null 2>&1 && echo -e " Gateway: $(status_icon "healthy")" || echo -e " Gateway: $(status_icon "error")"
check_service_running "Redis" "roa-redis" > /dev/null 2>&1 && echo -e " Redis: $(status_icon "healthy")" || echo -e " Redis: $(status_icon "error")"
# Quick HTTP checks
echo ""
echo -e "${PURPLE}API Status:${NC}"
http_health_check "Backend API" "http://localhost/api/health" 200 5 > /dev/null 2>&1 && echo -e " API: $(status_icon "healthy")" || echo -e " API: $(status_icon "error")"
http_health_check "Frontend" "http://localhost/health" 200 5 > /dev/null 2>&1 && echo -e " Frontend: $(status_icon "healthy")" || echo -e " Frontend: $(status_icon "error")"
if [[ ${#ISSUES[@]} -gt 0 ]]; then
echo ""
echo -e "${YELLOW}Current Issues:${NC}"
for issue in "${ISSUES[@]}"; do
echo " $issue"
done
fi
sleep 30
done
}
# Main function
main() {
local action=${1:-full}
case $action in
"full")
echo -e "${BLUE}ROA2WEB Comprehensive Health Check${NC}"
echo -e "${CYAN}$(date)${NC}"
check_services
check_disk_space
check_network
check_database
generate_summary
;;
"quick")
echo -e "${BLUE}ROA2WEB Quick Health Check${NC}"
# Reset status
OVERALL_HEALTH=true
ISSUES=()
check_services
generate_summary
;;
"services")
check_services
;;
"network")
check_network
;;
"database")
check_database
;;
"watch")
watch_mode
;;
*)
echo "Usage: $0 {full|quick|services|network|database|watch}"
echo ""
echo "Commands:"
echo " full - Comprehensive health check (default)"
echo " quick - Quick services health check"
echo " services - Check only ROA2WEB services"
echo " network - Check network connectivity"
echo " database - Check database connectivity"
echo " watch - Continuous monitoring mode"
exit 1
;;
esac
}
# Make sure bc is available for numeric comparisons
if ! command -v bc &> /dev/null; then
# Fallback function for numeric comparison without bc
compare_float() {
local val1=$1
local op=$2
local val2=$3
python3 -c "print($val1 $op $val2)" 2>/dev/null || echo "false"
}
# Replace bc usage with python3
alias bc='python3 -c'
fi
# Run main function
main "$@"