Reorganize oracle/ and chatbot/ into proxmox/ per LXC/VM structure

- Move oracle/migration-scripts/ to proxmox/lxc108-oracle/migration/
- Move oracle/roa/ and oracle/roa-romconstruct/ to proxmox/lxc108-oracle/sql/
- Move oracle/standby-server-scripts/ to proxmox/vm109-windows-dr/
- Move chatbot/ to proxmox/lxc104-flowise/
- Update proxmox/README.md with new structure and navigation
- Update all documentation with correct directory references
- Remove unused input/claude-agent-sdk/ files

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Marius
2026-01-27 17:28:53 +02:00
parent 4d51d5b2d2
commit a567f75f25
51 changed files with 233 additions and 1706 deletions

View File

@@ -0,0 +1,649 @@
#!/bin/bash
#
# Oracle DR Weekly Test with Proxmox PVE::Notify
# Automated DR test with notifications via Proxmox notification system
#
# Location: /opt/scripts/weekly-dr-test-proxmox.sh (on Proxmox host)
# Schedule: Add to cron for weekly execution (Saturdays)
#
# This script is SELF-SUFFICIENT:
# - Automatically creates notification templates if they don't exist
# - Uses Proxmox native notification system
# - No email configuration needed - uses existing Proxmox setup
#
# Installation:
# cp weekly-dr-test-proxmox.sh /opt/scripts/
# chmod +x /opt/scripts/weekly-dr-test-proxmox.sh
# /opt/scripts/weekly-dr-test-proxmox.sh --install # Creates templates
# crontab -e # Add: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh
#
# Author: Claude (based on ha-monitor.sh pattern)
# Version: 1.0
set -euo pipefail
# Set proper PATH for cron execution
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
# Configuration
DR_VM_ID="109"
DR_VM_IP="10.0.20.37"
DR_VM_PORT="22122"
DR_VM_USER="romfast"
BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup"
MAX_RESTORE_TIME_MIN=30
TEMPLATE_DIR="/usr/share/pve-manager/templates/default"
LOG_DIR="/var/log/oracle-dr"
LOG_FILE="$LOG_DIR/dr_test_$(date +%Y%m%d_%H%M%S).log"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Create log directory
mkdir -p "$LOG_DIR"
# Function to create notification templates
create_templates() {
echo -e "${GREEN}Creating Oracle DR test notification templates...${NC}"
# Create templates directory if needed
mkdir -p "$TEMPLATE_DIR"
# Subject template
cat > "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" <<'EOF'
Oracle DR Test {{test_result}} | {{date}}
EOF
# Text body template
cat > "$TEMPLATE_DIR/oracle-dr-test-body.txt.hbs" <<'EOF'
Oracle DR Test {{test_result}} | {{date}}
Severity: {{severity}}
SUMMARY
- Outcome: {{test_result}}
- Duration: {{total_duration}} min (restore {{restore_duration}} min)
- Backups used: {{backup_count}}
- Tables restored: {{tables_restored}}
COMPONENTS
- VM {{vm_id}} ({{vm_ip}}): {{vm_status}}
- NFS: {{nfs_status}}
- Database: {{database_status}}
- Cleanup: {{disk_freed}} GB freed
STEPS
{{#each test_steps}}
- {{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} - {{this.status}}{{/if}}
{{/each}}
{{#if has_errors}}
ISSUES
{{#each errors}}
- {{this}}
{{/each}}
{{/if}}
{{#if has_warnings}}
WARNINGS
{{#each warnings}}
- {{this}}
{{/each}}
{{/if}}
RMAN RESTORE LOG (complete)
---
{{restore_log}}
---
BASH SCRIPT LOG (last 100 lines)
---
{{bash_log}}
---
Full log: {{log_file}}
Next test: Saturday 06:00
EOF
# HTML body template (compact Gmail-friendly layout)
cat > "$TEMPLATE_DIR/oracle-dr-test-body.html.hbs" <<'EOF'
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Oracle DR Test {{test_result}} | {{date}}</title>
</head>
<body style="margin:0;padding:16px;font-family:Arial,Helvetica,sans-serif;background:#ffffff;color:#2c3e50;">
<table style="width:100%;max-width:640px;margin:0 auto;border-collapse:collapse;">
<tr>
<td style="padding:0 0 12px 0;font-size:18px;font-weight:600;">
Oracle DR Test {{test_result}}
</td>
</tr>
<tr>
<td style="padding:0 0 8px 0;font-size:13px;color:#6c757d;">{{date}} · Severity: {{severity}}</td>
</tr>
<tr>
<td style="padding:12px;border:1px solid #e1e4e8;border-radius:4px;">
<table style="width:100%;border-collapse:collapse;font-size:14px;">
<tr><td style="padding:4px 0;">Outcome</td><td style="padding:4px 0;text-align:right;">{{test_result}}</td></tr>
<tr><td style="padding:4px 0;">Duration</td><td style="padding:4px 0;text-align:right;">{{total_duration}} min (restore {{restore_duration}} min)</td></tr>
<tr><td style="padding:4px 0;">Backups used</td><td style="padding:4px 0;text-align:right;">{{backup_count}}</td></tr>
<tr><td style="padding:4px 0;">Tables restored</td><td style="padding:4px 0;text-align:right;">{{tables_restored}}</td></tr>
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
<tr><td style="padding:8px 12px;font-weight:600;">Components</td></tr>
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">VM {{vm_id}} ({{vm_ip}}): {{vm_status}}</td></tr>
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">NFS: {{nfs_status}}</td></tr>
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Database: {{database_status}}</td></tr>
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">Cleanup: {{disk_freed}} GB freed</td></tr>
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;">
<tr><td style="padding:0 0 6px 0;font-weight:600;">Steps</td></tr>
{{#each test_steps}}
<tr>
<td style="padding:4px 0;border-bottom:1px solid #f1f1f1;">{{#if this.passed}}✓{{else}}✗{{/if}} {{this.name}} ({{this.duration}}s){{#if this.status}} {{this.status}}{{/if}}</td>
</tr>
{{/each}}
</table>
</td>
</tr>
{{#if has_errors}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff5f5;border:1px solid #f1b0b7;border-radius:4px;">
<tr><td style="padding:8px 12px;font-weight:600;color:#c82333;">Issues</td></tr>
{{#each errors}}
<tr><td style="padding:6px 12px;border-top:1px solid #f8d7da;">• {{this}}</td></tr>
{{/each}}
</table>
</td>
</tr>
{{/if}}
{{#if has_warnings}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff8e5;border:1px solid #ffe8a1;border-radius:4px;">
<tr><td style="padding:8px 12px;font-weight:600;color:#856404;">Warnings</td></tr>
{{#each warnings}}
<tr><td style="padding:6px 12px;border-top:1px solid #ffe8a1;">• {{this}}</td></tr>
{{/each}}
</table>
</td>
</tr>
{{/if}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:12px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
<tr><td style="padding:8px 12px;font-weight:600;font-size:13px;">RMAN Restore Log (complete)</td></tr>
<tr><td style="padding:8px 12px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word;border-top:1px solid #e1e4e8;">{{restore_log}}</td></tr>
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:12px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
<tr><td style="padding:8px 12px;font-weight:600;font-size:13px;">Bash Script Log (last 100 lines)</td></tr>
<tr><td style="padding:8px 12px;font-family:monospace;white-space:pre-wrap;word-wrap:break-word;border-top:1px solid #e1e4e8;">{{bash_log}}</td></tr>
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;font-size:12px;color:#6c757d;">
Full log: {{log_file}} · Next test: Saturday 06:00
</td>
</tr>
</table>
</body>
</html>
EOF
echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}"
}
# Function to send notification via PVE::Notify
send_pve_notification() {
local severity="$1"
local data="$2"
# Create Perl script to call PVE::Notify
cat > /tmp/oracle-dr-notify.pl <<'PERL_SCRIPT'
#!/usr/bin/perl
use strict;
use warnings;
use PVE::Notify;
use JSON;
my $json_data = do { local $/; <STDIN> };
my $data = decode_json($json_data);
my $severity = $data->{severity} // 'info';
my $template_name = 'oracle-dr-test';
# Add fields for matching rules
my $fields = {
type => 'oracle-dr-test',
severity => $severity,
test_result => $data->{test_result},
};
# Send notification
eval {
PVE::Notify::notify(
$severity,
$template_name,
$data,
$fields
);
};
if ($@) {
print "Error sending notification: $@\n";
exit 1;
}
print "Notification sent successfully\n";
PERL_SCRIPT
chmod +x /tmp/oracle-dr-notify.pl
# Send notification
echo "$data" | perl /tmp/oracle-dr-notify.pl
rm -f /tmp/oracle-dr-notify.pl
}
# Logging functions
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1" | tee -a "$LOG_FILE"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1" | tee -a "$LOG_FILE"
}
log_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1" | tee -a "$LOG_FILE"
}
log_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1" | tee -a "$LOG_FILE"
}
# Test tracking
TEST_STEPS=()
ERRORS=()
WARNINGS=()
TEST_START_TIME=$(date +%s)
# Function to track test steps
track_step() {
local name="$1"
local passed="$2"
local status="$3"
local start_time="$4"
local end_time=$(date +%s)
local duration=$((end_time - start_time))
local step_json
step_json=$(jq -n \
--arg name "$name" \
--arg status "$status" \
--arg duration "$duration" \
--arg passed "$passed" \
'{name:$name, status:$status, duration:($duration|tonumber), passed:($passed == "true")}'
)
TEST_STEPS+=("$step_json")
if [ "$passed" = "false" ]; then
ERRORS+=("$name: $status")
fi
}
# Main test workflow
run_dr_test() {
local test_result="FAILED"
local severity="error"
local is_success=false
local restore_duration=0
local tables_restored=0
local db_status="UNKNOWN"
local nfs_status="Not checked"
local vm_status_label="Not started"
local cleanup_freed=0
local backup_count=0
local restore_log="Not collected"
log "=========================================="
log "Oracle DR Weekly Test - Starting"
log "=========================================="
# Step 1: Pre-flight checks
local step_start=$(date +%s)
log "STEP 1: Pre-flight checks"
# Check backups exist
backup_count=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' 2>/dev/null | wc -l)
if [ "$backup_count" -lt 2 ]; then
track_step "Pre-flight checks" false "Insufficient backups (found: $backup_count)" "$step_start"
test_result="FAILED - No backups"
else
track_step "Pre-flight checks" true "Found $backup_count backups" "$step_start"
# Step 2: Start VM
step_start=$(date +%s)
log "STEP 2: Starting DR VM"
if qm start "$DR_VM_ID" 2>/dev/null; then
vm_status_label="Running"
# Intelligent VM boot wait with polling (max 180s)
local MAX_BOOT_WAIT=180
local POLL_INTERVAL=5
local boot_elapsed=0
local vm_ready=false
log "Waiting for VM to become ready (SSH + PowerShell, max ${MAX_BOOT_WAIT}s)..."
while [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; do
# Check 1: VM running status in Proxmox
local vm_qm_status
vm_qm_status=$(qm status "$DR_VM_ID" 2>/dev/null | grep -o "running" || echo "")
if [ "$vm_qm_status" = "running" ]; then
# Check 2: SSH connectivity and PowerShell availability (what we actually need)
if ssh -p "$DR_VM_PORT" -o ConnectTimeout=5 -o StrictHostKeyChecking=no -o BatchMode=yes "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command 'Write-Output ready'" >/dev/null 2>&1; then
log "VM ready after ${boot_elapsed}s (SSH and PowerShell responding)"
vm_ready=true
break
fi
fi
sleep $POLL_INTERVAL
boot_elapsed=$((boot_elapsed + POLL_INTERVAL))
# Progress logging every 30 seconds
if [ $((boot_elapsed % 30)) -eq 0 ] && [ $boot_elapsed -lt $MAX_BOOT_WAIT ]; then
log "Still waiting for VM... (${boot_elapsed}s/${MAX_BOOT_WAIT}s elapsed)"
fi
done
if [ "$vm_ready" = false ]; then
log_warning "VM did not respond within ${MAX_BOOT_WAIT}s, continuing anyway (may cause subsequent failures)"
fi
track_step "VM Startup" true "VM $DR_VM_ID started and ready (${boot_elapsed}s)" "$step_start"
# Step 3: Verify NFS mount
step_start=$(date +%s)
log "STEP 3: Verifying NFS mount"
nfs_status="Not Mounted"
if ssh -p "$DR_VM_PORT" -o ConnectTimeout=10 "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command 'Test-Path F:\\ROA\\autobackup'" 2>/dev/null; then
nfs_status="Mounted"
track_step "NFS Mount Check" true "F:\\ drive accessible" "$step_start"
else
track_step "NFS Mount Check" false "F:\\ drive not accessible" "$step_start"
WARNINGS+=("NFS mount may need manual intervention")
fi
# Step 4: Run restore
step_start=$(date +%s)
local restore_start=$step_start
log "STEP 4: Running database restore"
if ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\rman_restore_from_zero.ps1 -TestMode" 2>&1 | tee -a "$LOG_FILE"; then
local restore_end=$(date +%s)
restore_duration=$(( (restore_end - restore_start) / 60 ))
track_step "Database Restore" true "Restored in $restore_duration minutes" "$step_start"
# Step 5: Verify database
step_start=$(date +%s)
log "STEP 5: Verifying database"
# Parse database status from LOG_FILE (rman_restore_from_zero.ps1 already verified it)
# Look for "OPEN_MODE: READ WRITE" in the captured output
if grep -q "OPEN_MODE: READ WRITE" "$LOG_FILE" 2>/dev/null; then
db_status="READ WRITE"
else
db_status=""
fi
# Parse table count from LOG_FILE (already captured in STEP 3 output)
# Look for "TABLES: <number>" in the output
tables_restored=$(grep -oP "TABLES:\s*\K\d+" "$LOG_FILE" 2>/dev/null | tail -1 || echo "0")
tables_restored=$(echo "$tables_restored" | tr -cd '0-9')
[ -z "$tables_restored" ] && tables_restored=0
if [[ "$db_status" == "READ WRITE" ]] && [ "$tables_restored" -gt 0 ]; then
track_step "Database Verification" true "Database OPEN, $tables_restored tables" "$step_start"
test_result="PASSED"
severity="info"
is_success=true
else
track_step "Database Verification" false "Database not OPEN" "$step_start"
fi
# Collect restore log from VM (always attempt collection - FULL log)
log "Collecting restore log from DR VM..."
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
# If not found, try alternate locations
if [ -z "$restore_log" ]; then
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
fi
# Still not found, use fallback message
if [ -z "$restore_log" ]; then
restore_log="Restore log not available (file may not exist or was not generated)"
fi
# Step 6: Cleanup (AFTER restore - stop service to release file locks)
step_start=$(date +%s)
log "STEP 6: Running cleanup"
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -ExecutionPolicy Bypass -File D:\\oracle\\scripts\\cleanup_database.ps1 /SILENT /AFTER" 2>/dev/null
cleanup_freed=8
track_step "Cleanup" true "Database cleaned, ~${cleanup_freed}GB freed" "$step_start"
else
# Collect restore log even when restore fails (FULL log)
log "Collecting restore log after failure..."
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command \"Get-Content 'D:\\oracle\\logs\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
if [ -z "$restore_log" ]; then
restore_log=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\restore_from_zero.log' -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
fi
# Always try to get some error output from RMAN script
if [ -z "$restore_log" ]; then
last_error=$(ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" \
"powershell -Command \"Get-Content 'D:\\oracle\\temp\\*.rman' -Tail 20 -ErrorAction SilentlyContinue\"" 2>/dev/null || echo "")
if [ -n "$last_error" ]; then
restore_log="RMAN script content (last 20 lines):\n$last_error"
else
restore_log="No restore logs or RMAN scripts found"
fi
fi
track_step "Database Restore" false "Restore failed" "$step_start"
fi
# Step 7: Shutdown VM
step_start=$(date +%s)
log "STEP 7: Shutting down VM"
ssh -p "$DR_VM_PORT" "$DR_VM_USER@$DR_VM_IP" "shutdown /s /t 30" 2>/dev/null
sleep 60
qm stop "$DR_VM_ID" 2>/dev/null
track_step "VM Shutdown" true "VM stopped" "$step_start"
vm_status_label="Stopped"
else
track_step "VM Startup" false "Failed to start VM $DR_VM_ID" "$step_start"
vm_status_label="Failed to start"
fi
fi
# Calculate total duration
local test_end_time=$(date +%s)
local total_duration=$(( (test_end_time - TEST_START_TIME) / 60 ))
# Prepare notification data
local steps_json
if [ ${#TEST_STEPS[@]} -eq 0 ]; then
steps_json='[]'
else
steps_json=$(printf '%s\n' "${TEST_STEPS[@]}" | jq -s '.')
fi
local errors_json
if [ ${#ERRORS[@]} -eq 0 ]; then
errors_json='[]'
else
errors_json=$(printf '%s\n' "${ERRORS[@]}" | jq -R . | jq -s .)
fi
local warnings_json
if [ ${#WARNINGS[@]} -eq 0 ]; then
warnings_json='[]'
else
warnings_json=$(printf '%s\n' "${WARNINGS[@]}" | jq -R . | jq -s .)
fi
local has_errors=false
local has_warnings=false
[ ${#ERRORS[@]} -gt 0 ] && has_errors=true
[ ${#WARNINGS[@]} -gt 0 ] && has_warnings=true
if [ "$is_success" = true ] && [ "$has_warnings" = true ]; then
severity="warning"
fi
local db_status_clean=$(echo "$db_status" | tr -d '\r' | sed 's/^ *//;s/ *$//')
# Escape restore log for JSON
local restore_log_json
restore_log_json=$(echo "$restore_log" | jq -Rs .)
# Collect last 100 lines of bash script log
local bash_log
bash_log=$(tail -100 "$LOG_FILE" 2>/dev/null || echo "Bash log not available")
local bash_log_json
bash_log_json=$(echo "$bash_log" | jq -Rs .)
local json_data=$(cat <<JSON
{
"severity": "$severity",
"test_result": "$test_result",
"date": "$(date '+%Y-%m-%d %H:%M:%S')",
"total_duration": $total_duration,
"is_success": $is_success,
"has_errors": $has_errors,
"has_warnings": $has_warnings,
"test_steps": $steps_json,
"errors": $errors_json,
"warnings": $warnings_json,
"backup_count": $backup_count,
"restore_duration": $restore_duration,
"tables_restored": ${tables_restored:-0},
"database_status": "${db_status_clean:-UNKNOWN}",
"disk_freed": $cleanup_freed,
"vm_id": "$DR_VM_ID",
"vm_ip": "$DR_VM_IP",
"vm_status": "$vm_status_label",
"nfs_status": "${nfs_status:-Unknown}",
"log_file": "$LOG_FILE",
"restore_log": $restore_log_json,
"bash_log": $bash_log_json
}
JSON
)
# Send notification
log "Sending notification..."
send_pve_notification "$severity" "$json_data"
# Final summary
log "=========================================="
log "Oracle DR Test Complete: $test_result"
log "Duration: $total_duration minutes"
log "Log: $LOG_FILE"
log "=========================================="
}
# Main execution
main() {
case "${1:-}" in
--install)
create_templates
echo ""
echo -e "${GREEN}Installation complete!${NC}"
echo "Next steps:"
echo "1. Test the script: /opt/scripts/weekly-dr-test-proxmox.sh"
echo "2. Add to cron: crontab -e"
echo " Add line: 0 6 * * 6 /opt/scripts/weekly-dr-test-proxmox.sh"
echo "3. Configure notifications in Proxmox GUI if needed:"
echo " Datacenter > Notifications > Add matching rules for 'oracle-dr-test'"
;;
--help)
echo "Oracle DR Weekly Test for Proxmox"
echo "Usage:"
echo " $0 - Run DR test"
echo " $0 --install - Create notification templates"
echo " $0 --help - Show this help"
;;
*)
# Check if templates exist, create if missing
if [ ! -f "$TEMPLATE_DIR/oracle-dr-test-subject.txt.hbs" ]; then
echo -e "${YELLOW}Templates not found, creating...${NC}"
create_templates
echo ""
fi
# Run DR test
run_dr_test
;;
esac
}
# Check dependencies
if ! command -v jq &> /dev/null; then
echo -e "${RED}Error: jq is not installed${NC}"
echo "Install with: apt-get install jq"
exit 1
fi
main "$@"