diff --git a/system_instructions/oracle/deploy-oracle-xe-in-docker-lxc.sh b/oracle/deploy-oracle-xe-in-docker-lxc.sh similarity index 100% rename from system_instructions/oracle/deploy-oracle-xe-in-docker-lxc.sh rename to oracle/deploy-oracle-xe-in-docker-lxc.sh diff --git a/system_instructions/oracle/oracle-migration-plan-10g-to-21c.md b/oracle/oracle-migration-plan-10g-to-21c.md similarity index 100% rename from system_instructions/oracle/oracle-migration-plan-10g-to-21c.md rename to oracle/oracle-migration-plan-10g-to-21c.md diff --git a/system_instructions/oracle/oracle-xe-complete-setup.sh b/oracle/oracle-xe-complete-setup.sh similarity index 100% rename from system_instructions/oracle/oracle-xe-complete-setup.sh rename to oracle/oracle-xe-complete-setup.sh diff --git a/system_instructions/oracle/oracle-xe-lxc-install-guide.md b/oracle/oracle-xe-lxc-install-guide.md similarity index 100% rename from system_instructions/oracle/oracle-xe-lxc-install-guide.md rename to oracle/oracle-xe-lxc-install-guide.md diff --git a/proxmox/ha-monitor.sh b/proxmox/ha-monitor.sh new file mode 100644 index 0000000..459e71d --- /dev/null +++ b/proxmox/ha-monitor.sh @@ -0,0 +1,360 @@ +#!/bin/bash + +# HA Monitor cu PVE::Notify - versiune finală +# Folosește sistemul nativ Proxmox cu template-uri personalizate +# +# TEMPLATE SYSTEM: +# ================ +# Acest script folosește template-uri Handlebars pentru formatarea emailurilor, +# exact ca sistemul de backup Proxmox. Template-urile trebuie create în: +# +# /etc/pve/notification-templates/default/ +# ├── ha-status-subject.txt.hbs (subject-ul emailului) +# ├── ha-status-body.txt.hbs (conținutul text al emailului) +# └── ha-status-body.html.hbs (opțional, versiunea HTML) +# +# Template-urile folosesc sintaxa Handlebars cu variabilele: +# - {{ hostname }} : FQDN-ul serverului +# - {{ status }} : "SUCCESSFUL" sau "FAILED" +# - {{ runtime }} : timpul de execuție în secunde +# - {{ details }} : detaliile verificării HA +# +# Pentru alte scripturi care vor să folosească sistemul de notificări Proxmox: +# 1. Creați template-urile în /etc/pve/notification-templates/default/ +# 2. Folosiți PVE::Notify::notify($severity, $template_name, $template_data, $fields) +# 3. $template_name trebuie să corespundă cu numele fișierelor template +# +# PREREQUISITE: +# ============= +# Template-urile sunt create automat de script la prima rulare + +HOSTNAME=$(hostname) +FQDN=$(hostname -f) +DATE=$(date '+%Y-%m-%d %H:%M:%S') +START_TIME=$(date +%s) + +# Funcție pentru crearea template-urilor de notificare +create_templates() { + local template_dir="/etc/pve/notification-templates/default" + + # Creează directorul dacă nu există + mkdir -p "$template_dir" + + echo "Creating notification templates in $template_dir..." + + # Template pentru subject - pentru SUCCESS + cat > "$template_dir/ha-status-subject.txt.hbs" << 'EOF' +{{#if (eq status "SUCCESSFUL")}}✅ HA CLUSTER OK - {{ hostname }}{{else}}🚨 HA CLUSTER ISSUES - {{ hostname }}{{/if}} +EOF + + # Template pentru body text + cat > "$template_dir/ha-status-body.txt.hbs" << 'EOF' +{{#if (eq status "SUCCESSFUL")}}✅ HIGH AVAILABILITY STATUS: ALL SYSTEMS OK{{else}}🚨 HIGH AVAILABILITY CLUSTER HAS ISSUES{{/if}} + +Host: {{ hostname }} +Check duration: {{ runtime }}s + +CLUSTER STATUS: +{{ details }} + +{{#if (eq status "FAILED")}} +=== HOW TO READ pvecm status OUTPUT === + +Your current problematic output shows: +- Total votes: 2 (WRONG - should be 3) +- Qdevice (votes 0) (WRONG - should be votes 1) + +After fix should show: +- Total votes: 3 (CORRECT) +- Qdevice (votes 1) (CORRECT) + +=== STEP-BY-STEP FIX === + +Step 1 - Fix Qdevice (PRIORITY): + systemctl restart corosync-qdevice + sleep 5 + corosync-qdevice-tool -s + +Step 2 - Verify cluster status: + pvecm status + LOOK FOR: Total votes: 3 (not 2!) and Qdevice (votes 1) + +Step 3 - Test HA functionality: + ha-manager status + +=== WHAT THIS MEANS === +QDEVICE DISCONNECTED: No tie-breaker vote +- If one node fails, cluster may lose quorum +- VMs won't automatically migrate + +The cluster works now but has no tie-breaker vote. +One node failure = no quorum = VMs can't migrate. +{{else}} +All HA components are functioning normally. +- Cluster has proper quorum with qdevice participation +- Automatic VM migration is available +- System is fully redundant +{{/if}} + +=== MANUAL SCRIPT EXECUTION === + +To run this HA status check manually: + +Basic check: + /opt/scripts/ha-monitor.sh + +Verbose output (shows details on console): + /opt/scripts/ha-monitor.sh -v + +Recreate email templates: + /opt/scripts/ha-monitor.sh --create-templates + +Script location: /opt/scripts/ha-monitor.sh +Log file: /var/log/pve-ha-monitor.log + +Total check time: {{ runtime }}s +EOF + + # Template pentru body HTML cu font mai mare și consistent + cat > "$template_dir/ha-status-body.html.hbs" << 'EOF' +
+ +{{#if (eq status "SUCCESSFUL")}} +

✅ HIGH AVAILABILITY STATUS: ALL SYSTEMS OK

+{{else}} +

🚨 HIGH AVAILABILITY CLUSTER HAS ISSUES

+{{/if}} + +

Host: {{ hostname }}
+Check duration: {{ runtime }}s

+ +

CLUSTER STATUS

+
{{ details }}
+ +{{#if (eq status "FAILED")}} +

HOW TO READ pvecm status OUTPUT

+

Your current problematic output shows:

+ + +

After fix should show:

+ + +

STEP-BY-STEP FIX

+ +

Step 1 - Fix Qdevice:

+
+
systemctl restart corosync-qdevice
+
sleep 5
+
corosync-qdevice-tool -s
+
+ +

Step 2 - Verify status:

+
+
pvecm status
+
+

LOOK FOR: Total votes: 3 (not 2!) and Qdevice (votes 1)

+ +

Bottom line: The cluster works now but has no tie-breaker vote.
+One node failure = no quorum = VMs can't migrate.

+ +{{else}} +

All HA components are functioning normally:

+ +{{/if}} + +

MANUAL SCRIPT EXECUTION

+

To run this HA status check manually:

+ +

Basic check:

+
+/opt/scripts/ha-monitor.sh +
+ +

Verbose output:

+
+/opt/scripts/ha-monitor.sh -v +
+ +

Recreate templates:

+
+/opt/scripts/ha-monitor.sh --create-templates +
+ +

Script location: /opt/scripts/ha-monitor.sh
+Log file: /var/log/pve-ha-monitor.log

+ +

Total check time: {{ runtime }}s

+ +
+EOF + + echo "Templates created successfully." +} + +# Creează template-urile la prima rulare sau dacă nu există +if [ ! -f "/etc/pve/notification-templates/default/ha-status-subject.txt.hbs" ]; then + create_templates +fi + +# Verificare HA status +check_ha_status() { + local status_ok=true + local details="" + + # Verifică serviciile HA + if systemctl is-active --quiet pve-ha-lrm && systemctl is-active --quiet pve-ha-crm; then + details+="HA Services: OK\n" + else + details+="HA Services: ERROR - Services not running\n" + details+=" Recovery: systemctl restart pve-ha-lrm pve-ha-crm\n" + status_ok=false + fi + + # Verifică quorum și qdevice + quorum_info=$(corosync-quorumtool -s 2>/dev/null) + pvecm_info=$(pvecm status 2>/dev/null) + + if echo "$quorum_info" | grep -q "Quorate:.*Yes"; then + expected_votes=$(echo "$quorum_info" | grep "Expected votes:" | awk '{print $3}') + total_votes=$(echo "$quorum_info" | grep "Total votes:" | awk '{print $3}') + + # Verifică qdevice prin pvecm status - caută linia cu "Qdevice" + qdevice_votes=$(echo "$pvecm_info" | grep -E "^[[:space:]]*0x00000000[[:space:]]+1[[:space:]]+Qdevice" | awk '{print $2}') + + if [ "$total_votes" = "$expected_votes" ] && [ "$qdevice_votes" = "1" ]; then + details+="Quorum: OK ($total_votes/$expected_votes votes, Qdevice participating)\n" + elif [ "$total_votes" = "$expected_votes" ]; then + details+="Quorum: OK ($total_votes/$expected_votes votes)\n" + else + details+="Quorum: WARNING ($total_votes/$expected_votes votes)\n" + details+=" Check: pvecm status for qdevice participation\n" + status_ok=false + fi + else + details+="Quorum: ERROR - Cluster not quorate\n" + details+=" Check: pvecm status && corosync-quorumtool -s\n" + status_ok=false + fi + + # Verifică conectivitatea qdevice + qdevice_status=$(corosync-qdevice-tool -s 2>/dev/null) + if echo "$qdevice_status" | grep -q "State:.*Connected"; then + qnetd_host=$(echo "$qdevice_status" | grep "QNetd host:" | awk '{print $3}') + details+="Qdevice Connection: OK ($qnetd_host)\n" + else + details+="Qdevice Connection: WARNING - Disconnected\n" + details+=" Recovery: systemctl restart corosync-qdevice\n" + status_ok=false + fi + + # Verifică nodurile prin pvecm status + nodes_online=$(echo "$pvecm_info" | grep -c "A,V,NMW") + + if [ "$nodes_online" -ge 2 ]; then + details+="Cluster Nodes: OK ($nodes_online nodes online)\n" + else + details+="Cluster Nodes: ERROR - Only $nodes_online nodes online\n" + details+=" Check: pvecm nodes && ping [offline-node-ip]\n" + status_ok=false + fi + + # Adaugă secțiune de recovery doar pentru cazurile de eroare + if ! $status_ok; then + details+="\n=== IMMEDIATE ACTIONS REQUIRED ===\n" + details+="1. SSH to cluster: ssh root@$(hostname -f)\n" + details+="2. Check overall status: pvecm status\n" + details+="3. Review HA logs: journalctl -u pve-ha-lrm -u pve-ha-crm -n 20\n" + details+="4. Check network connectivity between nodes\n" + fi + + if $status_ok; then + echo "SUCCESSFUL" + else + echo "FAILED" + fi + echo -e "$details" +} + +# Execută verificarea +RESULT=$(check_ha_status) +STATUS=$(echo "$RESULT" | head -n 1) +DETAILS=$(echo "$RESULT" | tail -n +2) + +# Calculează timpul +END_TIME=$(date +%s) +RUNTIME=$((END_TIME - START_TIME)) + +# Determină severity +if [ "$STATUS" = "SUCCESSFUL" ]; then + SEVERITY="info" +else + SEVERITY="error" +fi + +# Trimite notificarea prin PVE::Notify cu tipul "ha-status" +perl -I/usr/share/perl5 << EOF +use strict; +use warnings; +use PVE::Notify; + +# Date pentru template (în format JSON-like pentru Perl) +my \$template_data = { + 'hostname' => '$FQDN', + 'status' => '$STATUS', + 'runtime' => '$RUNTIME', + 'details' => '$DETAILS' +}; + +# Metadata pentru matcher +my \$fields = { + 'hostname' => '$HOSTNAME', + 'type' => 'ha-status', + 'status' => '$STATUS' +}; + +# Trimite notificarea cu tipul "ha-status" +# Va folosi template-urile din /etc/pve/notification-templates/default/ha-status-* +eval { + PVE::Notify::notify('$SEVERITY', 'ha-status', \$template_data, \$fields); + print "Notification sent successfully\\n"; +}; +if (\$@) { + print STDERR "Failed to send notification: \$@\\n"; + exit 1; +} +EOF + +PERL_EXIT_CODE=$? + +# Log local +echo "$(date): HA status check completed - $STATUS, notification exit code: $PERL_EXIT_CODE" >> /var/log/pve-ha-monitor.log + +# Output pentru testare +if [ "$1" == "--verbose" ] || [ "$1" == "-v" ]; then + echo "=== HA MONITOR REPORT ===" + echo "Status: $STATUS" + echo "Runtime: ${RUNTIME}s" + echo "Severity: $SEVERITY" + echo "Perl exit code: $PERL_EXIT_CODE" + echo + echo "Details:" + echo "$DETAILS" + echo + echo "Using template: ha-status" + echo "Template data: hostname=$FQDN, status=$STATUS, runtime=${RUNTIME}s" +elif [ "$1" == "--create-templates" ] || [ "$1" == "--templates" ]; then + create_templates + echo "Templates recreated successfully." + echo "Run './ha-monitor.sh -v' to test with new templates." + exit 0 +fi \ No newline at end of file diff --git a/system_instructions/proxmox-ssh-guide.md b/proxmox/proxmox-ssh-guide.md similarity index 100% rename from system_instructions/proxmox-ssh-guide.md rename to proxmox/proxmox-ssh-guide.md diff --git a/proxmox/vm107-monitor.sh b/proxmox/vm107-monitor.sh new file mode 100644 index 0000000..27dd53c --- /dev/null +++ b/proxmox/vm107-monitor.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +HOSTNAME=$(hostname) +DATE=$(date '+%Y-%m-%d %H:%M:%S') +VM_ID=107 +LOG_FILE="/var/log/vm107-monitor.log" + +check_vm_status() { + local vm_status=$(qm status $VM_ID 2>/dev/null | grep -oP 'status: \K.*') + + if [ -z "$vm_status" ]; then + echo "ERROR:VM_NOT_FOUND" + return 1 + fi + + if [ "$vm_status" = "running" ]; then + if ps aux | grep -E "(kvm|qemu).*-id $VM_ID" | grep -qv grep; then + echo "RUNNING:OK" + return 0 + else + echo "ERROR:PROCESS_MISSING" + return 1 + fi + elif [ "$vm_status" = "stopped" ]; then + echo "STOPPED:VM_STOPPED" + return 1 + elif [ "$vm_status" = "internal-error" ]; then + echo "ERROR:INTERNAL_ERROR" + return 1 + else + echo "ERROR:UNKNOWN_STATE:$vm_status" + return 1 + fi +} + +RESULT=$(check_vm_status) +CHECK_EXIT=$? + +STATUS=$(echo "$RESULT" | cut -d: -f1) +ERROR_TYPE=$(echo "$RESULT" | cut -d: -f2) + +echo "$(date '+%Y-%m-%d %H:%M:%S'): VM $VM_ID status: $STATUS ($ERROR_TYPE)" >> "$LOG_FILE" + +if [ $CHECK_EXIT -ne 0 ]; then + SUBJECT="VM $VM_ID $STATUS on $HOSTNAME" + BODY="VM $VM_ID Status: $STATUS\n" + BODY+="Error Type: $ERROR_TYPE\n" + BODY+="Check Time: $DATE\n" + BODY+="Host: $HOSTNAME\n\n" + + if [ "$STATUS" = "STOPPED" ]; then + BODY+="VM is stopped. To start: qm start $VM_ID" + elif [ "$ERROR_TYPE" = "INTERNAL_ERROR" ]; then + BODY+="VM has internal-error (KVM crash)." + elif [ "$ERROR_TYPE" = "PROCESS_MISSING" ]; then + BODY+="VM marked as running but process missing." + fi + + # Trimite email simplu prin mail command + echo -e "$BODY" | mail -s "$SUBJECT" root + + echo "$(date '+%Y-%m-%d %H:%M:%S'): Email sent for VM $VM_ID $STATUS" >> "$LOG_FILE" +fi + +exit $CHECK_EXIT