Files
ROMFASTSQL/oracle/standby-server-scripts/oracle-backup-monitor-proxmox.sh
Marius b50cc2b8c4 Oracle DR: Fix backup retention and monitoring for new naming convention
Problem: Backups accumulated on DR (73 files, 4 days) instead of keeping only 2 days
- transfer_incremental.ps1 had no cleanup function (ran 2x/day without cleanup)
- transfer_to_dr.ps1 cleanup had poor logging
- oracle-backup-monitor-proxmox.sh couldn't detect new L0/L1 backup format

Changes:
- Add cleanup to transfer_incremental.ps1 (delete backups older than 2 days)
- Improve cleanup logging in transfer_to_dr.ps1 (shows count before/after)
- Update oracle-backup-monitor-proxmox.sh to detect both naming conventions:
  * Old: *FULL*.BKP, *INCR*.BKP
  * New: L0_*.BKP (Level 0), L1_*.BKP (Level 1)
- Remove temporary files from /input/ directory

Result: Monitor now correctly reports backup age, cleanup runs after each transfer

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-14 18:05:11 +03:00

512 lines
18 KiB
Bash

#!/bin/bash
#
# Oracle Backup Monitor for Proxmox with PVE::Notify
# Monitors Oracle backups and sends notifications via Proxmox notification system
#
# Location: /opt/scripts/oracle-backup-monitor-proxmox.sh (on Proxmox host)
# Schedule: Add to cron for daily execution
#
# This script is SELF-SUFFICIENT:
# - Automatically creates notification templates if they don't exist
# - Uses Proxmox native notification system (same as HA alerts)
# - No email configuration needed - uses existing Proxmox setup
#
# Installation:
# cp oracle-backup-monitor-proxmox.sh /opt/scripts/
# chmod +x /opt/scripts/oracle-backup-monitor-proxmox.sh
# /opt/scripts/oracle-backup-monitor-proxmox.sh --install # Creates templates
# crontab -e # Add: 0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh
#
# Author: Claude (based on ha-monitor.sh pattern)
# Version: 1.0
set -euo pipefail
# Configuration
PRIMARY_HOST="10.0.20.36"
PRIMARY_PORT="22122"
PRIMARY_USER="Administrator"
BACKUP_PATH="/mnt/pve/oracle-backups/ROA/autobackup"
MAX_FULL_AGE_HOURS=25
MAX_CUMULATIVE_AGE_HOURS=7
TEMPLATE_DIR="/usr/share/pve-manager/templates/default"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Function to create notification templates
create_templates() {
echo -e "${GREEN}Creating Oracle backup notification templates...${NC}"
# Create templates directory if needed
mkdir -p "$TEMPLATE_DIR"
# Subject template
cat > "$TEMPLATE_DIR/oracle-backup-subject.txt.hbs" <<'EOF'
Oracle Backup {{status}} | {{node}}
EOF
# Text body template
cat > "$TEMPLATE_DIR/oracle-backup-body.txt.hbs" <<'EOF'
Oracle Backup {{status}} | {{node}}
Date: {{date}}
SUMMARY
- Full backup: {{full_backup_age}}h (limit {{full_backup_limit}}h) -> {{#if full_backup_ok}}OK{{else}}CHECK{{/if}}
- Incremental: {{cumulative_backup_age}}h (limit {{cumulative_backup_limit}}h) -> {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}}
- Backups: {{total_backups}} files ({{total_size_label}})
- Disk usage: {{disk_usage}}%
{{#if has_errors}}
ISSUES
{{#each errors}}
- {{this}}
{{/each}}
{{/if}}
{{#if has_warnings}}
WARNINGS
{{#each warnings}}
- {{this}}
{{/each}}
{{/if}}
FULL BACKUPS ({{full_backup_count}} files)
{{#if has_full_backups}}
{{#each full_backup_list}}
- {{this}}
{{/each}}
{{else}}
- none detected
{{/if}}
INCREMENTAL BACKUPS ({{incr_backup_count}} files)
{{#if has_incr_backups}}
{{#each incr_backup_list}}
- {{this}}
{{/each}}
{{else}}
- none detected
{{/if}}
Next check: +24h via Proxmox Monitor
EOF
# HTML body template (lightweight Gmail-friendly)
cat > "$TEMPLATE_DIR/oracle-backup-body.html.hbs" <<'EOF'
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>Oracle Backup {{status}} | {{node}}</title>
</head>
<body style="margin:0;padding:16px;font-family:Arial,Helvetica,sans-serif;background:#ffffff;color:#2c3e50;">
<table style="width:100%;max-width:640px;margin:0 auto;border-collapse:collapse;">
<tr>
<td style="padding:0 0 12px 0;font-size:18px;font-weight:600;">
Oracle Backup {{status}} | {{node}}
</td>
</tr>
<tr>
<td style="padding:0 0 16px 0;font-size:13px;color:#6c757d;">
{{date}}
</td>
</tr>
<tr>
<td style="padding:12px;border:1px solid #e1e4e8;border-radius:4px;">
<table style="width:100%;border-collapse:collapse;font-size:14px;">
<tr>
<td style="padding:4px 0;">Full backup</td>
<td style="padding:4px 0;text-align:right;">
{{full_backup_age}}h / {{full_backup_limit}}h · {{#if full_backup_ok}}OK{{else}}CHECK{{/if}}
</td>
</tr>
<tr>
<td style="padding:4px 0;">Incremental</td>
<td style="padding:4px 0;text-align:right;">
{{cumulative_backup_age}}h / {{cumulative_backup_limit}}h · {{#if cumulative_backup_ok}}OK{{else}}CHECK{{/if}}
</td>
</tr>
<tr>
<td style="padding:4px 0;">Backups</td>
<td style="padding:4px 0;text-align:right;">{{total_backups}} files ({{total_size_label}})</td>
</tr>
<tr>
<td style="padding:4px 0;">Disk usage</td>
<td style="padding:4px 0;text-align:right;">{{disk_usage}}%</td>
</tr>
</table>
</td>
</tr>
{{#if has_errors}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff5f5;border:1px solid #f1b0b7;border-radius:4px;">
<tr><td style="padding:8px 12px;font-weight:600;color:#c82333;">Issues</td></tr>
{{#each errors}}
<tr><td style="padding:6px 12px;border-top:1px solid #f8d7da;">• {{this}}</td></tr>
{{/each}}
</table>
</td>
</tr>
{{/if}}
{{#if has_warnings}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:14px;background:#fff8e5;border:1px solid #ffe8a1;border-radius:4px;">
<tr><td style="padding:8px 12px;font-weight:600;color:#856404;">Warnings</td></tr>
{{#each warnings}}
<tr><td style="padding:6px 12px;border-top:1px solid #ffe8a1;">• {{this}}</td></tr>
{{/each}}
</table>
</td>
</tr>
{{/if}}
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:13px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
<tr><td style="padding:8px 12px;font-weight:600;">FULL Backups ({{full_backup_count}} files)</td></tr>
{{#if has_full_backups}}
{{#each full_backup_list}}
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• {{this}}</td></tr>
{{/each}}
{{else}}
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• none detected</td></tr>
{{/if}}
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;">
<table style="width:100%;border-collapse:collapse;font-size:13px;border:1px solid #e1e4e8;border-radius:4px;background:#f9fafb;">
<tr><td style="padding:8px 12px;font-weight:600;">INCREMENTAL Backups ({{incr_backup_count}} files)</td></tr>
{{#if has_incr_backups}}
{{#each incr_backup_list}}
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• {{this}}</td></tr>
{{/each}}
{{else}}
<tr><td style="padding:6px 12px;border-top:1px solid #e1e4e8;">• none detected</td></tr>
{{/if}}
</table>
</td>
</tr>
<tr>
<td style="padding:16px 0 0 0;font-size:12px;color:#6c757d;">
Next automated check: +24h via Proxmox Monitor
</td>
</tr>
</table>
</body>
</html>
EOF
echo -e "${GREEN}Templates created successfully in $TEMPLATE_DIR${NC}"
}
# Function to send notification via PVE::Notify
send_pve_notification() {
local severity="$1"
local status="$2"
local data="$3"
# Create Perl script to call PVE::Notify
cat > /tmp/oracle-notify.pl <<'PERL_SCRIPT'
#!/usr/bin/perl
use strict;
use warnings;
use PVE::Notify;
use JSON;
my $json_data = do { local $/; <STDIN> };
my $data = decode_json($json_data);
my $severity = $data->{severity} // 'info';
my $template_name = 'oracle-backup';
# Add fields for matching rules
my $fields = {
type => 'oracle-backup',
severity => $severity,
hostname => $data->{node} // 'unknown',
};
# Send notification
eval {
PVE::Notify::notify(
$severity,
$template_name,
$data,
$fields
);
};
if ($@) {
print "Error sending notification: $@\n";
exit 1;
}
print "Notification sent successfully\n";
PERL_SCRIPT
chmod +x /tmp/oracle-notify.pl
# Send notification
echo "$data" | perl /tmp/oracle-notify.pl
rm -f /tmp/oracle-notify.pl
}
# Function to check backups
check_backups() {
local status="OK"
local errors=()
local warnings=()
echo "Checking Oracle backups..."
local total_backups=0
local total_size_label="0G"
local full_age_hours="N/A"
local cumulative_age_hours="N/A"
local full_backup_ok=false
local cumulative_backup_ok=false
local disk_usage=0
local -a backup_entries=()
if [ ! -d "$BACKUP_PATH" ]; then
status="ERROR"
errors+=("Backup path $BACKUP_PATH not accessible")
else
if compgen -G "$BACKUP_PATH"/*.BKP > /dev/null; then
total_backups=$(find "$BACKUP_PATH" -maxdepth 1 -type f -name '*.BKP' | wc -l)
total_backups=${total_backups//[[:space:]]/}
[ -z "$total_backups" ] && total_backups=0
local total_size=$(du -shc "$BACKUP_PATH"/*.BKP 2>/dev/null | tail -1 | awk '{print $1}')
[ -z "$total_size" ] && total_size="0G"
total_size_label="$total_size"
# Search for FULL backups (both old and new naming conventions)
# Old format: *FULL*.BKP, New format: L0_*.BKP
local latest_full=$(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*FULL*.BKP' -o -name 'L0_*.BKP' \) -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-)
if [ -n "$latest_full" ]; then
local full_timestamp=$(stat -c %Y "$latest_full")
local current_timestamp=$(date +%s)
full_age_hours=$(( (current_timestamp - full_timestamp) / 3600 ))
if [ "$full_age_hours" -gt "$MAX_FULL_AGE_HOURS" ]; then
status="WARNING"
warnings+=("FULL backup is $full_age_hours hours old (threshold: $MAX_FULL_AGE_HOURS)")
else
full_backup_ok=true
fi
else
status="ERROR"
errors+=("No FULL backup found")
fi
# Search for INCREMENTAL backups (both old and new naming conventions)
# Old format: *INCR*.BKP, *INCREMENTAL*.BKP, *CUMULATIVE*.BKP
# New format: L1_*.BKP
local latest_cumulative=$(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' -o -name 'L1_*.BKP' \) -printf '%T@ %p\n' | sort -nr | head -1 | cut -d' ' -f2-)
if [ -n "$latest_cumulative" ]; then
local cumulative_timestamp=$(stat -c %Y "$latest_cumulative")
local current_timestamp=$(date +%s)
cumulative_age_hours=$(( (current_timestamp - cumulative_timestamp) / 3600 ))
if [ "$cumulative_age_hours" -gt "$MAX_CUMULATIVE_AGE_HOURS" ]; then
if [ "$status" != "ERROR" ]; then status="WARNING"; fi
warnings+=("CUMULATIVE backup is $cumulative_age_hours hours old (threshold: $MAX_CUMULATIVE_AGE_HOURS)")
else
cumulative_backup_ok=true
fi
fi
# Collect ALL FULL backups (both old and new naming conventions)
local -a full_backups=()
local -a full_backup_entries=()
if readarray -t full_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*FULL*.BKP' -o -name 'L0_*.BKP' \) -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then
for backup_file in "${full_backups[@]}"; do
[ -z "$backup_file" ] && continue
local backup_name=$(basename "$backup_file")
local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M')
local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1)
[ -z "$backup_size" ] && backup_size="N/A"
full_backup_entries+=("$backup_time | $backup_name | $backup_size")
done
fi
# Collect ALL INCREMENTAL backups (both old and new naming conventions)
local -a incr_backups=()
local -a incr_backup_entries=()
if readarray -t incr_backups < <(find "$BACKUP_PATH" -maxdepth 1 -type f \( -name '*INCR*.BKP' -o -name '*INCREMENTAL*.BKP' -o -name '*CUMULATIVE*.BKP' -o -name 'L1_*.BKP' \) -printf '%T@ %p\n' | sort -nr | cut -d' ' -f2-); then
for backup_file in "${incr_backups[@]}"; do
[ -z "$backup_file" ] && continue
local backup_name=$(basename "$backup_file")
local backup_time=$(date -r "$backup_file" '+%Y-%m-%d %H:%M')
local backup_size=$(du -sh "$backup_file" 2>/dev/null | cut -f1)
[ -z "$backup_size" ] && backup_size="N/A"
incr_backup_entries+=("$backup_time | $backup_name | $backup_size")
done
fi
else
status="ERROR"
errors+=("No backup files found in $BACKUP_PATH")
fi
local disk_usage_raw=$(df "$BACKUP_PATH" 2>/dev/null | tail -1 | awk '{print int($5)}')
if [ -n "$disk_usage_raw" ]; then
disk_usage="$disk_usage_raw"
else
if [ "$status" = "OK" ]; then status="WARNING"; fi
warnings+=("Unable to determine disk usage for $BACKUP_PATH")
fi
fi
if [ "$disk_usage" -gt 90 ]; then
status="ERROR"
errors+=("Disk usage critical: ${disk_usage}%")
elif [ "$disk_usage" -gt 80 ]; then
if [ "$status" != "ERROR" ]; then status="WARNING"; fi
warnings+=("Disk usage high: ${disk_usage}%")
fi
local severity="info"
[ "$status" = "WARNING" ] && severity="warning"
[ "$status" = "ERROR" ] && severity="error"
local errors_json
if [ ${#errors[@]} -eq 0 ]; then
errors_json='[]'
else
errors_json=$(printf '%s\n' "${errors[@]}" | jq -R . | jq -s .)
fi
local warnings_json
if [ ${#warnings[@]} -eq 0 ]; then
warnings_json='[]'
else
warnings_json=$(printf '%s\n' "${warnings[@]}" | jq -R . | jq -s .)
fi
local full_backup_list_json
if [ ${#full_backup_entries[@]} -eq 0 ]; then
full_backup_list_json='[]'
else
full_backup_list_json=$(printf '%s\n' "${full_backup_entries[@]}" | jq -R . | jq -s .)
fi
local incr_backup_list_json
if [ ${#incr_backup_entries[@]} -eq 0 ]; then
incr_backup_list_json='[]'
else
incr_backup_list_json=$(printf '%s\n' "${incr_backup_entries[@]}" | jq -R . | jq -s .)
fi
local has_errors=false
local has_warnings=false
local has_full_backups=false
local has_incr_backups=false
[ ${#errors[@]} -gt 0 ] && has_errors=true
[ ${#warnings[@]} -gt 0 ] && has_warnings=true
[ ${#full_backup_entries[@]} -gt 0 ] && has_full_backups=true
[ ${#incr_backup_entries[@]} -gt 0 ] && has_incr_backups=true
local json_data=$(cat <<JSON
{
"severity": "$severity",
"node": "$(hostname)",
"date": "$(date +'%Y-%m-%d %H:%M:%S')",
"status": "$status",
"errors": $errors_json,
"warnings": $warnings_json,
"has_errors": $has_errors,
"has_warnings": $has_warnings,
"total_backups": $total_backups,
"total_size_gb": "${total_size_label%G}",
"total_size_label": "$total_size_label",
"full_backup_age": "${full_age_hours}",
"cumulative_backup_age": "${cumulative_age_hours}",
"disk_usage": "${disk_usage}",
"full_backup_ok": $([ "$full_backup_ok" = "true" ] && echo "true" || echo "false"),
"cumulative_backup_ok": $([ "$cumulative_backup_ok" = "true" ] && echo "true" || echo "false"),
"is_error": $([ "$status" = "ERROR" ] && echo "true" || echo "false"),
"is_warning": $([ "$status" = "WARNING" ] && echo "true" || echo "false"),
"full_backup_list": $full_backup_list_json,
"incr_backup_list": $incr_backup_list_json,
"has_full_backups": $has_full_backups,
"has_incr_backups": $has_incr_backups,
"full_backup_count": ${#full_backup_entries[@]},
"incr_backup_count": ${#incr_backup_entries[@]},
"full_backup_limit": "$MAX_FULL_AGE_HOURS",
"cumulative_backup_limit": "$MAX_CUMULATIVE_AGE_HOURS"
}
JSON
)
if [ "$status" != "OK" ]; then
echo -e "${YELLOW}Issues detected, sending notification...${NC}"
send_pve_notification "$severity" "$status" "$json_data"
else
echo -e "${GREEN}All backups are healthy${NC}"
# Optionally send success notification (uncomment if desired)
# send_pve_notification "info" "$status" "$json_data"
fi
echo "Status: $status"
echo "Total backups: $total_backups"
echo "Total size: $total_size_label"
echo "FULL backup age: $full_age_hours hours"
echo "CUMULATIVE backup age: $cumulative_age_hours hours"
echo "Disk usage: ${disk_usage}%"
}
# Main execution
main() {
case "${1:-}" in
--install)
create_templates
echo ""
echo -e "${GREEN}Installation complete!${NC}"
echo "Next steps:"
echo "1. Test the monitor: /opt/scripts/oracle-backup-monitor-proxmox.sh"
echo "2. Add to cron: crontab -e"
echo " Add line: 0 9 * * * /opt/scripts/oracle-backup-monitor-proxmox.sh"
echo "3. Configure notifications in Proxmox GUI if needed:"
echo " Datacenter > Notifications > Add matching rules for 'oracle-backup'"
;;
--help)
echo "Oracle Backup Monitor for Proxmox"
echo "Usage:"
echo " $0 - Check backups and send alerts if issues found"
echo " $0 --install - Create notification templates"
echo " $0 --help - Show this help"
;;
*)
# Check if templates exist, create if missing
if [ ! -f "$TEMPLATE_DIR/oracle-backup-subject.txt.hbs" ]; then
echo -e "${YELLOW}Templates not found, creating...${NC}"
create_templates
echo ""
fi
# Run backup check
check_backups
;;
esac
}
# Check dependencies
if ! command -v jq &> /dev/null; then
echo -e "${RED}Error: jq is not installed${NC}"
echo "Install with: apt-get install jq"
exit 1
fi
main "$@"