diff --git a/documents/procedures/databasecluster.md b/documents/procedures/databasecluster.md new file mode 100644 index 0000000..1c26165 --- /dev/null +++ b/documents/procedures/databasecluster.md @@ -0,0 +1,87 @@ +# Database Cluster (baboon.sshjunkie.com) + +## Overview +The database cluster consists of two PostgreSQL database servers hosted on `baboon.sshjunkie.com`. These servers are used to store data for services such as Mastodon and AzuraCast. The cluster ensures high availability and fault tolerance through replication and backup strategies. + +## Installation +Install PostgreSQL on both nodes in the cluster: + +```bash +# Update package list and install PostgreSQL +sudo apt update +sudo apt install -y postgresql postgresql-contrib + +# Ensure PostgreSQL is running +sudo systemctl start postgresql +sudo systemctl enable postgresql +``` + +## Configuration +### PostgreSQL Configuration Files: +- **pg_hba.conf**: + - Allow replication and local connections. + - Example: + ```ini + local all postgres md5 + host replication all 192.168.0.0/16 md5 + ``` +- **postgresql.conf**: + - Set `wal_level` for replication: + ```ini + wal_level = hot_standby + max_wal_senders = 3 + ``` + +### Replication Configuration: +- Set up streaming replication between the two nodes (`baboon.sshjunkie.com` as the master and the second node as the replica). + +1. On the master node, enable replication and restart PostgreSQL. +2. On the replica node, set up replication by copying the data directory from the master node and configure the `recovery.conf` file. + +Example `recovery.conf` on the replica: +```ini +standby_mode = on +primary_conninfo = 'host=baboon.sshjunkie.com port=5432 user=replicator password=your_password' +trigger_file = '/tmp/postgresql.trigger.5432' +``` + +## Usage +- **Check the status of PostgreSQL**: + ```bash + sudo systemctl status postgresql + ``` + +- **Promote the replica to master**: + ```bash + pg_ctl promote -D /var/lib/postgresql/data + ``` + +## Backups +Use `pg_basebackup` to create full backups of the cluster. Example: + +```bash +pg_basebackup -h baboon.sshjunkie.com -U replicator -D /backups/db_backup -Ft -z -P +``` + +Automate backups with cronjobs for regular snapshots. + +## Troubleshooting +- **Issue**: Replica is lagging behind. + - **Solution**: Check network connectivity and ensure the replica is able to connect to the master node. Monitor replication lag with: + ```bash + SELECT * FROM pg_stat_replication; + ``` + +## Monitoring +- **Monitor replication status**: + ```bash + SELECT * FROM pg_stat_replication; + ``` + +- **Monitor database health**: + ```bash + pg_isready + ``` + +## Additional Information +- [PostgreSQL Streaming Replication Documentation](https://www.postgresql.org/docs/current/warm-standby.html) diff --git a/miscellaneous/bash/db2_backup.sh b/miscellaneous/bash/db2_backup.sh index cef3263..3e399bb 100755 --- a/miscellaneous/bash/db2_backup.sh +++ b/miscellaneous/bash/db2_backup.sh @@ -1,79 +1,69 @@ #!/bin/bash +# +# Script Name: db2_zfs_backup.sh +# Description: Creates a raw base backup of PostgreSQL on zcluster.technodrome2 using pg_basebackup in directory mode. +# Transfers the backup to The Vaultโ€™s ZFS dataset and snapshots it for long-term retention. +# Requirements: pg_basebackup, SSH access, rclone or rsync, ZFS dataset available at destination +# Usage: ./db2_zfs_backup.sh +# Author: Doc @ Genesis Ops +# Date: 2025-05-12 +# -SOURCE_DIR="/tmp/db2_backup/" -DEST_DIR="thevault:/nexus/postgresql/db2/" -DATE=$(date +%Y%m%d%H%M) +### CONFIGURATION ### +SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com" +SOURCE_USER="doc" PG_USER="postgres" -SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com" # Source server (database server) -SOURCE_REMOTE="technodrome2" # rclone remote for source server (configured earlier) -DEST_REMOTE="thevault" # rclone remote for destination server (The Vault) +SOURCE_BASE_DIR="/tmp/db2_backup" # On the remote node +BACKUP_LABEL="$(date +%Y%m%d%H%M)" +REMOTE_BACKUP_DIR="$SOURCE_BASE_DIR/$BACKUP_LABEL" -# Ensure necessary tools are available -command -v pg_basebackup >/dev/null 2>&1 || { echo "pg_basebackup not found, exiting."; exit 1; } -command -v rclone >/dev/null 2>&1 || { echo "rclone not found, exiting."; exit 1; } -command -v ssh >/dev/null 2>&1 || { echo "ssh not found, exiting."; exit 1; } +# Remote source rclone config (optional) +SOURCE_REMOTE="technodrome2" -# Step 1: SSH into the database server and run pg_basebackup -echo "Starting pg_basebackup for db2 on $SOURCE_SERVER..." +# Local destination +DEST_DATASET="vaultpool/postgresql/db2" # Adjust as needed +DEST_MOUNT="/nexus/postgresql/db2" # Must be mountpoint for $DEST_DATASET +FULL_DEST="$DEST_MOUNT/$BACKUP_LABEL" -# Ensure the directory exists on the source server -ssh doc@$SOURCE_SERVER "sudo mkdir -p $SOURCE_DIR$DATE" # Create the directory if it doesn't exist +##################### -# Run pg_basebackup -ssh doc@$SOURCE_SERVER "pg_basebackup -h localhost -D $SOURCE_DIR$DATE -U $PG_USER -Ft -z -P" +echo "๐Ÿš€ Starting ZFS-aware base backup for db2 from $SOURCE_SERVER..." -# Check if pg_basebackup was successful -if [ $? -eq 0 ]; then - echo "pg_basebackup completed successfully. Verifying backup directory on $SOURCE_SERVER..." +# Ensure pg_basebackup will run cleanly +ssh $SOURCE_USER@$SOURCE_SERVER "sudo mkdir -p '$REMOTE_BACKUP_DIR' && \ + sudo pg_basebackup -h localhost -D '$REMOTE_BACKUP_DIR' -U $PG_USER -Fp -R -X fetch -P" - # Verify that the backup directory exists - ssh doc@$SOURCE_SERVER "ls -l $SOURCE_DIR$DATE" - - # If the directory exists, proceed with rclone transfer - if [ $? -eq 0 ]; then - echo "Backup directory exists. Proceeding to rclone transfer..." - - # Step 2: Use rclone to copy the backup from the source server to The Vault - retry=0 - max_retries=3 - while ! rclone copy $SOURCE_REMOTE:$SOURCE_DIR$DATE/ $DEST_REMOTE:$DEST_DIR --progress --checksum; do - if [ $retry -ge $max_retries ]; then - echo "Rclone transfer failed after $max_retries attempts." - exit 1 - fi - retry=$((retry+1)) - echo "Retrying rclone transfer... attempt $retry" - sleep 5 - done - - # Check if rclone was successful - if [ $? -eq 0 ]; then - echo "Rclone transfer completed successfully. Proceeding to snapshot..." - - # Step 3: Create a ZFS snapshot on The Vault - ssh root@thevault.sshjunkie.com "sudo zfs snapshot nexus/postgresql/db2@$DATE" - - # Verify snapshot creation on The Vault - ssh root@thevault.sshjunkie.com "zfs list -t snapshot | grep nexus/postgresql/db2@$DATE" >/dev/null 2>&1 - if [ $? -eq 0 ]; then - echo "ZFS snapshot nexus/postgresql/db2@$DATE created successfully on The Vault." - else - echo "Snapshot creation failed on The Vault." - exit 1 - fi - else - echo "Error during rclone transfer. Backup not transferred." - exit 1 - fi - else - echo "Backup directory not found on $SOURCE_SERVER. Aborting transfer." - exit 1 - fi -else - echo "Error during pg_basebackup. Backup not created." +if [[ $? -ne 0 ]]; then + echo "โŒ pg_basebackup failed on $SOURCE_SERVER." exit 1 fi -# Clean up after success -ssh doc@$SOURCE_SERVER "sudo rm -rf $SOURCE_DIR$DATE" -echo "Backup and snapshot process completed successfully." +echo "๐Ÿ“ฆ Backup created on $SOURCE_SERVER at $REMOTE_BACKUP_DIR" + +# Pull the backup using rsync (preserves structure + timestamps) +echo "๐Ÿ”„ Syncing backup to The Vault at $FULL_DEST..." +mkdir -p "$FULL_DEST" +rsync -avz --progress $SOURCE_USER@$SOURCE_SERVER:"$REMOTE_BACKUP_DIR/" "$FULL_DEST/" + +if [[ $? -ne 0 ]]; then + echo "โŒ rsync transfer failed!" + exit 1 +fi + +# Snapshot the full ZFS backup dataset +SNAPSHOT_NAME="${DEST_DATASET}@${BACKUP_LABEL}" +echo "๐Ÿ“ธ Creating ZFS snapshot: $SNAPSHOT_NAME" +zfs snapshot "$SNAPSHOT_NAME" + +if [[ $? -eq 0 ]]; then + echo "โœ… Snapshot $SNAPSHOT_NAME created successfully." +else + echo "โŒ Snapshot creation failed." + exit 1 +fi + +# Optional: Clean up the remote backup dir +echo "๐Ÿงน Cleaning up temporary backup on $SOURCE_SERVER..." +ssh $SOURCE_USER@$SOURCE_SERVER "sudo rm -rf '$REMOTE_BACKUP_DIR'" + +echo "๐ŸŽ‰ Backup and ZFS snapshot complete. Stored in $FULL_DEST" diff --git a/miscellaneous/bash/pull_health_everywhere_ntp.sh b/miscellaneous/bash/pull_health_everywhere_ntp.sh new file mode 100755 index 0000000..3ae6ebd --- /dev/null +++ b/miscellaneous/bash/pull_health_everywhere_ntp.sh @@ -0,0 +1,101 @@ +#!/bin/bash + +# === CONFIG === +REMOTE_USER="doc" +BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k" +CHAT_ID="1559582356" +TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') +LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log" + +# Thresholds +SWAP_LIMIT_MB=512 +LOAD_LIMIT=4.0 + +mkdir -p "$HOME/krang-logs" + +SERVERS=( + thevault.sshjunkie.com + zcluster.technodrome1.sshjunkie.com + zcluster.technodrome2.sshjunkie.com + shredder.sshjunkie.com + chatwithus.live +) + +SUMMARY="๐Ÿ“ก Krang System Health Report - $TIMESTAMP + +" + +for HOST in "${SERVERS[@]}"; do + echo "๐Ÿ” Collecting from $HOST..." + + DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF' +HOST=$(hostname) +MEM=$(free -h | awk '/Mem:/ {print $4 " free"}') +SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}') +SWAP="$SWAP_RAW Mi used" +DISK=$(df -h / | awk 'NR==2 {print $4 " free"}') +LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs) +APACHE=$(systemctl is-active apache2 2>/dev/null || systemctl is-active httpd 2>/dev/null) +[ "$APACHE" = "active" ] && APACHE_STATUS="โœ… Apache running" || APACHE_STATUS="โŒ Apache not running" + +echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$APACHE_STATUS" +EOF +) + + IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 APACHE_STATUS <<< "$DATA" + + ALERTS="" + if (( SWAP_MB > SWAP_LIMIT_MB )); then + ALERTS+="โš ๏ธ HIGH SWAP ($SWAP_HUMAN) +" + fi + + LOAD_INT=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}") + if [ "$LOAD_INT" -eq 1 ]; then + ALERTS+="โš ๏ธ HIGH LOAD ($LOAD1) +" + fi + + ALERTS_MSG="" + [ -n "$ALERTS" ] && ALERTS_MSG="๐Ÿšจ ALERTS: +$ALERTS" + + SUMMARY+="๐Ÿ–ฅ๏ธ $H +โ€ข Mem: $MEM +โ€ข Swap: $SWAP_HUMAN +โ€ข Disk: $DISK +โ€ข Load: $LOAD1 +โ€ข $APACHE_STATUS +$ALERTS_MSG + +" +done + +# === KRANG CLOCK ACCURACY CHECK === +NTP_RESULT=$(ntpdate -q time.google.com 2>&1) +OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}') +OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}") + +if [[ -n "$OFFSET_MS" ]]; then + if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then + # Auto-correct the system clock + CORRECTION=$(ntpdate -u time.google.com 2>&1) + SUMMARY+="๐Ÿ› ๏ธ Auto-corrected Krang clock via ntpdate: $CORRECTION +" + + SUMMARY+="๐Ÿ•ฐ๏ธ Krang Clock Offset: ${OFFSET_MS}ms โ€” โš ๏ธ OUT OF SYNC +" + else + SUMMARY+="๐Ÿ•ฐ๏ธ Krang Clock Offset: ${OFFSET_MS}ms โ€” โœ… SYNCHRONIZED +" + fi +else + SUMMARY+="๐Ÿ•ฐ๏ธ Krang Clock Check: โŒ FAILED to retrieve offset. +" +fi + +# Log to file +echo -e "$SUMMARY" > "$LOGFILE" + +# Send to Telegram +curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage -d chat_id="$CHAT_ID" -d text="$SUMMARY" diff --git a/miscellaneous/bash/rsync_zfs_sync_helper.sh b/miscellaneous/bash/rsync_zfs_sync_helper.sh old mode 100644 new mode 100755 diff --git a/recordtheshow/show_schedule.json b/recordtheshow/show_schedule.json index 21daa6e..99f716e 100755 --- a/recordtheshow/show_schedule.json +++ b/recordtheshow/show_schedule.json @@ -60,7 +60,7 @@ ] }, "chaos": { - "recording": true, + "recording": false, "duration": 10800, "schedule": [ {