#!/bin/bash # # Replicate rpool/oracle-backups from pveelite (active NFS server) to # pvemini (standby) every 15 minutes via incremental zfs send/recv. # # Why: NFS storage on pveelite is the single point that the DR test and # the daily SCP transfers from primary Oracle Windows depend on. With # 15-min ZFS replicas, pvemini can take over within minutes if pveelite # becomes unreachable (run /opt/scripts/failover-dr-to-pvemini.sh). # # Why not pvesr or pve-zsync: # * pvesr only replicates VM/CT disks, not arbitrary datasets. # * pve-zsync would add a package dependency for one job. zfs send # over SSH is the simplest mechanism that fits the rest of the # cluster's replication patterns. # # Schedule: */15 * * * * via cron on pveelite. # Initial sync: # zfs send rpool/oracle-backups@init_ | ssh root@ \ # 'zfs recv -F rpool/oracle-backups && zfs set readonly=on rpool/oracle-backups' # ssh root@ 'zfs set mountpoint=/mnt/pve/oracle-backups rpool/oracle-backups' set -euo pipefail export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" DATASET="rpool/oracle-backups" TARGET_HOST="10.0.20.201" # pvemini direct IP (avoids tailscale magicDNS detour) SNAP_PREFIX="repl" KEEP_SNAPS=5 # rolling history on source side LOCK="/var/run/zfs-replicate-oracle-backups.lock" LOG="/var/log/oracle-dr/replication.log" SSH_OPTS="-o UserKnownHostsFile=/etc/pve/priv/known_hosts -o StrictHostKeyChecking=no -o BatchMode=yes" mkdir -p "$(dirname "$LOG")" exec 9>"$LOCK" flock -n 9 || { echo "[$(date)] previous run still active, skipping" >>"$LOG"; exit 0; } log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" >>"$LOG"; } NEW_SNAP="${DATASET}@${SNAP_PREFIX}_$(date +%Y%m%d_%H%M%S)" zfs snapshot "$NEW_SNAP" # Find previous replication snapshot (excluding the one we just made) PREV_SNAP=$(zfs list -t snapshot -o name -s creation "$DATASET" 2>/dev/null \ | awk -v p="${DATASET}@${SNAP_PREFIX}_" '$0 ~ p' \ | grep -v "$NEW_SNAP" \ | tail -1 || true) if [ -n "$PREV_SNAP" ]; then log "Incremental send: $PREV_SNAP -> $NEW_SNAP" if ! zfs send -i "$PREV_SNAP" "$NEW_SNAP" | \ ssh $SSH_OPTS root@${TARGET_HOST} "zfs recv -F $DATASET" 2>>"$LOG"; then log "ERROR: incremental send failed" zfs destroy "$NEW_SNAP" 2>/dev/null || true exit 1 fi else log "Full send (no previous snapshot found): $NEW_SNAP" if ! zfs send "$NEW_SNAP" | \ ssh $SSH_OPTS root@${TARGET_HOST} "zfs recv -F $DATASET" 2>>"$LOG"; then log "ERROR: full send failed" zfs destroy "$NEW_SNAP" 2>/dev/null || true exit 1 fi fi # Prune old snapshots on source (keep last KEEP_SNAPS) zfs list -t snapshot -o name -s creation "$DATASET" \ | awk -v p="${DATASET}@${SNAP_PREFIX}_" '$0 ~ p' \ | head -n -${KEEP_SNAPS} \ | xargs -r -n1 zfs destroy 2>>"$LOG" || true log "Replication completed successfully"