Auto-commit from giteapush.sh at 2025-05-14 07:12:51

2025-05-14 07:12:51 -04:00 · 2025-05-14 07:12:51 -04:00 · 0739d35b2f
commit 0739d35b2f
parent e4f1e3396c
5 changed files with 246 additions and 68 deletions
--- a/documents/procedures/databasecluster.md
+++ b/documents/procedures/databasecluster.md
@ -0,0 +1,87 @@
 # Database Cluster (baboon.sshjunkie.com)
 ## Overview
 The database cluster consists of two PostgreSQL database servers hosted on `baboon.sshjunkie.com`. These servers are used to store data for services such as Mastodon and AzuraCast. The cluster ensures high availability and fault tolerance through replication and backup strategies.
 ## Installation
 Install PostgreSQL on both nodes in the cluster:
 ```bash
 # Update package list and install PostgreSQL
 sudo apt update
 sudo apt install -y postgresql postgresql-contrib
 # Ensure PostgreSQL is running
 sudo systemctl start postgresql
 sudo systemctl enable postgresql
 ```
 ## Configuration
 ### PostgreSQL Configuration Files:
 - **pg_hba.conf**:
  - Allow replication and local connections.
  - Example:
    ```ini
    local   all             postgres                                md5
    host    replication     all             192.168.0.0/16            md5
    ```
 - **postgresql.conf**:
  - Set `wal_level` for replication:
    ```ini
    wal_level = hot_standby
    max_wal_senders = 3
    ```
 ### Replication Configuration:
 - Set up streaming replication between the two nodes (`baboon.sshjunkie.com` as the master and the second node as the replica).
 1. On the master node, enable replication and restart PostgreSQL.
 2. On the replica node, set up replication by copying the data directory from the master node and configure the `recovery.conf` file.
 Example `recovery.conf` on the replica:
 ```ini
 standby_mode = on
 primary_conninfo = 'host=baboon.sshjunkie.com port=5432 user=replicator password=your_password'
 trigger_file = '/tmp/postgresql.trigger.5432'
 ```
 ## Usage
 - **Check the status of PostgreSQL**:
  ```bash
  sudo systemctl status postgresql
  ```
 - **Promote the replica to master**:
  ```bash
  pg_ctl promote -D /var/lib/postgresql/data
  ```
 ## Backups
 Use `pg_basebackup` to create full backups of the cluster. Example:
 ```bash
 pg_basebackup -h baboon.sshjunkie.com -U replicator -D /backups/db_backup -Ft -z -P
 ```
 Automate backups with cronjobs for regular snapshots.
 ## Troubleshooting
 - **Issue**: Replica is lagging behind.
  - **Solution**: Check network connectivity and ensure the replica is able to connect to the master node. Monitor replication lag with:
    ```bash
    SELECT * FROM pg_stat_replication;
    ```
 ## Monitoring
 - **Monitor replication status**:
  ```bash
  SELECT * FROM pg_stat_replication;
  ```
 - **Monitor database health**:
  ```bash
  pg_isready
  ```
 ## Additional Information
 - [PostgreSQL Streaming Replication Documentation](https://www.postgresql.org/docs/current/warm-standby.html)
--- a/miscellaneous/bash/db2_backup.sh
+++ b/miscellaneous/bash/db2_backup.sh
@ -1,79 +1,69 @@
 #!/bin/bash
 #
 # Script Name: db2_zfs_backup.sh
 # Description: Creates a raw base backup of PostgreSQL on zcluster.technodrome2 using pg_basebackup in directory mode.
 #              Transfers the backup to The Vault’s ZFS dataset and snapshots it for long-term retention.
 # Requirements: pg_basebackup, SSH access, rclone or rsync, ZFS dataset available at destination
 # Usage: ./db2_zfs_backup.sh
 # Author: Doc @ Genesis Ops
 # Date: 2025-05-12
 #
-SOURCE_DIR="/tmp/db2_backup/"
+### CONFIGURATION ###
-DEST_DIR="thevault:/nexus/postgresql/db2/"
+SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com"
-DATE=$(date +%Y%m%d%H%M)
+SOURCE_USER="doc"
 PG_USER="postgres"
-SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com"  # Source server (database server)
+SOURCE_BASE_DIR="/tmp/db2_backup"  # On the remote node
-SOURCE_REMOTE="technodrome2"  # rclone remote for source server (configured earlier)
+BACKUP_LABEL="$(date +%Y%m%d%H%M)"
-DEST_REMOTE="thevault"        # rclone remote for destination server (The Vault)
+REMOTE_BACKUP_DIR="$SOURCE_BASE_DIR/$BACKUP_LABEL"
-# Ensure necessary tools are available
+# Remote source rclone config (optional)
-command -v pg_basebackup >/dev/null 2>&1 || { echo "pg_basebackup not found, exiting."; exit 1; }
+SOURCE_REMOTE="technodrome2"
 command -v rclone >/dev/null 2>&1 || { echo "rclone not found, exiting."; exit 1; }
 command -v ssh >/dev/null 2>&1 || { echo "ssh not found, exiting."; exit 1; }
-# Step 1: SSH into the database server and run pg_basebackup
+# Local destination
-echo "Starting pg_basebackup for db2 on $SOURCE_SERVER..."
+DEST_DATASET="vaultpool/postgresql/db2"   # Adjust as needed
 DEST_MOUNT="/nexus/postgresql/db2"        # Must be mountpoint for $DEST_DATASET
 FULL_DEST="$DEST_MOUNT/$BACKUP_LABEL"
-# Ensure the directory exists on the source server
+#####################
 ssh doc@$SOURCE_SERVER "sudo mkdir -p $SOURCE_DIR$DATE"  # Create the directory if it doesn't exist
-# Run pg_basebackup
+echo "🚀 Starting ZFS-aware base backup for db2 from $SOURCE_SERVER..."
 ssh doc@$SOURCE_SERVER "pg_basebackup -h localhost -D $SOURCE_DIR$DATE -U $PG_USER -Ft -z -P"
-# Check if pg_basebackup was successful
+# Ensure pg_basebackup will run cleanly
-if [ $? -eq 0 ]; then
+ssh $SOURCE_USER@$SOURCE_SERVER "sudo mkdir -p '$REMOTE_BACKUP_DIR' && \
-    echo "pg_basebackup completed successfully. Verifying backup directory on $SOURCE_SERVER..."
+    sudo pg_basebackup -h localhost -D '$REMOTE_BACKUP_DIR' -U $PG_USER -Fp -R -X fetch -P"
-    # Verify that the backup directory exists
+if [[ $? -ne 0 ]]; then
-    ssh doc@$SOURCE_SERVER "ls -l $SOURCE_DIR$DATE"
+    echo "❌ pg_basebackup failed on $SOURCE_SERVER."
    # If the directory exists, proceed with rclone transfer
    if [ $? -eq 0 ]; then
        echo "Backup directory exists. Proceeding to rclone transfer..."
        # Step 2: Use rclone to copy the backup from the source server to The Vault
        retry=0
        max_retries=3
        while ! rclone copy $SOURCE_REMOTE:$SOURCE_DIR$DATE/ $DEST_REMOTE:$DEST_DIR --progress --checksum; do
            if [ $retry -ge $max_retries ]; then
                echo "Rclone transfer failed after $max_retries attempts."
    exit 1
 fi
            retry=$((retry+1))
            echo "Retrying rclone transfer... attempt $retry"
            sleep 5
        done
-        # Check if rclone was successful
+echo "📦 Backup created on $SOURCE_SERVER at $REMOTE_BACKUP_DIR"
        if [ $? -eq 0 ]; then
            echo "Rclone transfer completed successfully. Proceeding to snapshot..."
-            # Step 3: Create a ZFS snapshot on The Vault
+# Pull the backup using rsync (preserves structure + timestamps)
-            ssh root@thevault.sshjunkie.com "sudo zfs snapshot nexus/postgresql/db2@$DATE"
+echo "🔄 Syncing backup to The Vault at $FULL_DEST..."
 mkdir -p "$FULL_DEST"
 rsync -avz --progress $SOURCE_USER@$SOURCE_SERVER:"$REMOTE_BACKUP_DIR/" "$FULL_DEST/"
-            # Verify snapshot creation on The Vault
+if [[ $? -ne 0 ]]; then
-            ssh root@thevault.sshjunkie.com "zfs list -t snapshot | grep nexus/postgresql/db2@$DATE" >/dev/null 2>&1
+    echo "❌ rsync transfer failed!"
-            if [ $? -eq 0 ]; then
+    exit 1
-                echo "ZFS snapshot nexus/postgresql/db2@$DATE created successfully on The Vault."
+fi
 # Snapshot the full ZFS backup dataset
 SNAPSHOT_NAME="${DEST_DATASET}@${BACKUP_LABEL}"
 echo "📸 Creating ZFS snapshot: $SNAPSHOT_NAME"
 zfs snapshot "$SNAPSHOT_NAME"
 if [[ $? -eq 0 ]]; then
    echo "✅ Snapshot $SNAPSHOT_NAME created successfully."
 else
-                echo "Snapshot creation failed on The Vault."
+    echo "❌ Snapshot creation failed."
                exit 1
            fi
        else
            echo "Error during rclone transfer. Backup not transferred."
            exit 1
        fi
    else
        echo "Backup directory not found on $SOURCE_SERVER. Aborting transfer."
        exit 1
    fi
 else
    echo "Error during pg_basebackup. Backup not created."
    exit 1
 fi
-# Clean up after success
+# Optional: Clean up the remote backup dir
-ssh doc@$SOURCE_SERVER "sudo rm -rf $SOURCE_DIR$DATE"
+echo "🧹 Cleaning up temporary backup on $SOURCE_SERVER..."
-echo "Backup and snapshot process completed successfully."
+ssh $SOURCE_USER@$SOURCE_SERVER "sudo rm -rf '$REMOTE_BACKUP_DIR'"
 echo "🎉 Backup and ZFS snapshot complete. Stored in $FULL_DEST"
--- a/miscellaneous/bash/pull_health_everywhere_ntp.sh
+++ b/miscellaneous/bash/pull_health_everywhere_ntp.sh
@ -0,0 +1,101 @@
 #!/bin/bash
 # === CONFIG ===
 REMOTE_USER="doc"
 BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k"
 CHAT_ID="1559582356"
 TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
 LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log"
 # Thresholds
 SWAP_LIMIT_MB=512
 LOAD_LIMIT=4.0
 mkdir -p "$HOME/krang-logs"
 SERVERS=(
  thevault.sshjunkie.com
  zcluster.technodrome1.sshjunkie.com
  zcluster.technodrome2.sshjunkie.com
  shredder.sshjunkie.com
  chatwithus.live
 )
 SUMMARY="📡 Krang System Health Report - $TIMESTAMP
 "
 for HOST in "${SERVERS[@]}"; do
  echo "🔍 Collecting from $HOST..."
  DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF'
 HOST=$(hostname)
 MEM=$(free -h | awk '/Mem:/ {print $4 " free"}')
 SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}')
 SWAP="$SWAP_RAW Mi used"
 DISK=$(df -h / | awk 'NR==2 {print $4 " free"}')
 LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs)
 APACHE=$(systemctl is-active apache2 2>/dev/null || systemctl is-active httpd 2>/dev/null)
 [ "$APACHE" = "active" ] && APACHE_STATUS="✅ Apache running" || APACHE_STATUS="❌ Apache not running"
 echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$APACHE_STATUS"
 EOF
 )
  IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 APACHE_STATUS <<< "$DATA"
  ALERTS=""
  if (( SWAP_MB > SWAP_LIMIT_MB )); then
    ALERTS+="⚠️ HIGH SWAP ($SWAP_HUMAN)
 "
  fi
  LOAD_INT=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}")
  if [ "$LOAD_INT" -eq 1 ]; then
    ALERTS+="⚠️ HIGH LOAD ($LOAD1)
 "
  fi
  ALERTS_MSG=""
  [ -n "$ALERTS" ] && ALERTS_MSG="🚨 ALERTS:
 $ALERTS"
  SUMMARY+="🖥️ $H
 • Mem: $MEM
 • Swap: $SWAP_HUMAN
 • Disk: $DISK
 • Load: $LOAD1
 • $APACHE_STATUS
 $ALERTS_MSG
 "
 done
 # === KRANG CLOCK ACCURACY CHECK ===
 NTP_RESULT=$(ntpdate -q time.google.com 2>&1)
 OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}')
 OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}")
 if [[ -n "$OFFSET_MS" ]]; then
  if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then
    # Auto-correct the system clock
    CORRECTION=$(ntpdate -u time.google.com 2>&1)
    SUMMARY+="🛠️ Auto-corrected Krang clock via ntpdate: $CORRECTION
 "
    SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ⚠️ OUT OF SYNC
 "
  else
    SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ✅ SYNCHRONIZED
 "
  fi
 else
  SUMMARY+="🕰️ Krang Clock Check: ❌ FAILED to retrieve offset.
 "
 fi
 # Log to file
 echo -e "$SUMMARY" > "$LOGFILE"
 # Send to Telegram
 curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage   -d chat_id="$CHAT_ID"   -d text="$SUMMARY"
--- a/miscellaneous/bash/rsync_zfs_sync_helper.sh
+++ b/miscellaneous/bash/rsync_zfs_sync_helper.sh
--- a/recordtheshow/show_schedule.json
+++ b/recordtheshow/show_schedule.json
@ -60,7 +60,7 @@
        ]
    },
    "chaos": {
-        "recording": true,
+        "recording": false,
        "duration": 10800,
        "schedule": [
            {