Auto-commit from giteapush.sh at 2025-05-14 07:12:51

This commit is contained in:
DocTator 2025-05-14 07:12:51 -04:00
parent e4f1e3396c
commit 0739d35b2f
5 changed files with 246 additions and 68 deletions

View File

@ -0,0 +1,87 @@
# Database Cluster (baboon.sshjunkie.com)
## Overview
The database cluster consists of two PostgreSQL database servers hosted on `baboon.sshjunkie.com`. These servers are used to store data for services such as Mastodon and AzuraCast. The cluster ensures high availability and fault tolerance through replication and backup strategies.
## Installation
Install PostgreSQL on both nodes in the cluster:
```bash
# Update package list and install PostgreSQL
sudo apt update
sudo apt install -y postgresql postgresql-contrib
# Ensure PostgreSQL is running
sudo systemctl start postgresql
sudo systemctl enable postgresql
```
## Configuration
### PostgreSQL Configuration Files:
- **pg_hba.conf**:
- Allow replication and local connections.
- Example:
```ini
local all postgres md5
host replication all 192.168.0.0/16 md5
```
- **postgresql.conf**:
- Set `wal_level` for replication:
```ini
wal_level = hot_standby
max_wal_senders = 3
```
### Replication Configuration:
- Set up streaming replication between the two nodes (`baboon.sshjunkie.com` as the master and the second node as the replica).
1. On the master node, enable replication and restart PostgreSQL.
2. On the replica node, set up replication by copying the data directory from the master node and configure the `recovery.conf` file.
Example `recovery.conf` on the replica:
```ini
standby_mode = on
primary_conninfo = 'host=baboon.sshjunkie.com port=5432 user=replicator password=your_password'
trigger_file = '/tmp/postgresql.trigger.5432'
```
## Usage
- **Check the status of PostgreSQL**:
```bash
sudo systemctl status postgresql
```
- **Promote the replica to master**:
```bash
pg_ctl promote -D /var/lib/postgresql/data
```
## Backups
Use `pg_basebackup` to create full backups of the cluster. Example:
```bash
pg_basebackup -h baboon.sshjunkie.com -U replicator -D /backups/db_backup -Ft -z -P
```
Automate backups with cronjobs for regular snapshots.
## Troubleshooting
- **Issue**: Replica is lagging behind.
- **Solution**: Check network connectivity and ensure the replica is able to connect to the master node. Monitor replication lag with:
```bash
SELECT * FROM pg_stat_replication;
```
## Monitoring
- **Monitor replication status**:
```bash
SELECT * FROM pg_stat_replication;
```
- **Monitor database health**:
```bash
pg_isready
```
## Additional Information
- [PostgreSQL Streaming Replication Documentation](https://www.postgresql.org/docs/current/warm-standby.html)

View File

@ -1,79 +1,69 @@
#!/bin/bash
#
# Script Name: db2_zfs_backup.sh
# Description: Creates a raw base backup of PostgreSQL on zcluster.technodrome2 using pg_basebackup in directory mode.
# Transfers the backup to The Vaults ZFS dataset and snapshots it for long-term retention.
# Requirements: pg_basebackup, SSH access, rclone or rsync, ZFS dataset available at destination
# Usage: ./db2_zfs_backup.sh
# Author: Doc @ Genesis Ops
# Date: 2025-05-12
#
SOURCE_DIR="/tmp/db2_backup/"
DEST_DIR="thevault:/nexus/postgresql/db2/"
DATE=$(date +%Y%m%d%H%M)
### CONFIGURATION ###
SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com"
SOURCE_USER="doc"
PG_USER="postgres"
SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com" # Source server (database server)
SOURCE_REMOTE="technodrome2" # rclone remote for source server (configured earlier)
DEST_REMOTE="thevault" # rclone remote for destination server (The Vault)
SOURCE_BASE_DIR="/tmp/db2_backup" # On the remote node
BACKUP_LABEL="$(date +%Y%m%d%H%M)"
REMOTE_BACKUP_DIR="$SOURCE_BASE_DIR/$BACKUP_LABEL"
# Ensure necessary tools are available
command -v pg_basebackup >/dev/null 2>&1 || { echo "pg_basebackup not found, exiting."; exit 1; }
command -v rclone >/dev/null 2>&1 || { echo "rclone not found, exiting."; exit 1; }
command -v ssh >/dev/null 2>&1 || { echo "ssh not found, exiting."; exit 1; }
# Remote source rclone config (optional)
SOURCE_REMOTE="technodrome2"
# Step 1: SSH into the database server and run pg_basebackup
echo "Starting pg_basebackup for db2 on $SOURCE_SERVER..."
# Local destination
DEST_DATASET="vaultpool/postgresql/db2" # Adjust as needed
DEST_MOUNT="/nexus/postgresql/db2" # Must be mountpoint for $DEST_DATASET
FULL_DEST="$DEST_MOUNT/$BACKUP_LABEL"
# Ensure the directory exists on the source server
ssh doc@$SOURCE_SERVER "sudo mkdir -p $SOURCE_DIR$DATE" # Create the directory if it doesn't exist
#####################
# Run pg_basebackup
ssh doc@$SOURCE_SERVER "pg_basebackup -h localhost -D $SOURCE_DIR$DATE -U $PG_USER -Ft -z -P"
echo "🚀 Starting ZFS-aware base backup for db2 from $SOURCE_SERVER..."
# Check if pg_basebackup was successful
if [ $? -eq 0 ]; then
echo "pg_basebackup completed successfully. Verifying backup directory on $SOURCE_SERVER..."
# Ensure pg_basebackup will run cleanly
ssh $SOURCE_USER@$SOURCE_SERVER "sudo mkdir -p '$REMOTE_BACKUP_DIR' && \
sudo pg_basebackup -h localhost -D '$REMOTE_BACKUP_DIR' -U $PG_USER -Fp -R -X fetch -P"
# Verify that the backup directory exists
ssh doc@$SOURCE_SERVER "ls -l $SOURCE_DIR$DATE"
# If the directory exists, proceed with rclone transfer
if [ $? -eq 0 ]; then
echo "Backup directory exists. Proceeding to rclone transfer..."
# Step 2: Use rclone to copy the backup from the source server to The Vault
retry=0
max_retries=3
while ! rclone copy $SOURCE_REMOTE:$SOURCE_DIR$DATE/ $DEST_REMOTE:$DEST_DIR --progress --checksum; do
if [ $retry -ge $max_retries ]; then
echo "Rclone transfer failed after $max_retries attempts."
exit 1
fi
retry=$((retry+1))
echo "Retrying rclone transfer... attempt $retry"
sleep 5
done
# Check if rclone was successful
if [ $? -eq 0 ]; then
echo "Rclone transfer completed successfully. Proceeding to snapshot..."
# Step 3: Create a ZFS snapshot on The Vault
ssh root@thevault.sshjunkie.com "sudo zfs snapshot nexus/postgresql/db2@$DATE"
# Verify snapshot creation on The Vault
ssh root@thevault.sshjunkie.com "zfs list -t snapshot | grep nexus/postgresql/db2@$DATE" >/dev/null 2>&1
if [ $? -eq 0 ]; then
echo "ZFS snapshot nexus/postgresql/db2@$DATE created successfully on The Vault."
else
echo "Snapshot creation failed on The Vault."
exit 1
fi
else
echo "Error during rclone transfer. Backup not transferred."
exit 1
fi
else
echo "Backup directory not found on $SOURCE_SERVER. Aborting transfer."
exit 1
fi
else
echo "Error during pg_basebackup. Backup not created."
if [[ $? -ne 0 ]]; then
echo "❌ pg_basebackup failed on $SOURCE_SERVER."
exit 1
fi
# Clean up after success
ssh doc@$SOURCE_SERVER "sudo rm -rf $SOURCE_DIR$DATE"
echo "Backup and snapshot process completed successfully."
echo "📦 Backup created on $SOURCE_SERVER at $REMOTE_BACKUP_DIR"
# Pull the backup using rsync (preserves structure + timestamps)
echo "🔄 Syncing backup to The Vault at $FULL_DEST..."
mkdir -p "$FULL_DEST"
rsync -avz --progress $SOURCE_USER@$SOURCE_SERVER:"$REMOTE_BACKUP_DIR/" "$FULL_DEST/"
if [[ $? -ne 0 ]]; then
echo "❌ rsync transfer failed!"
exit 1
fi
# Snapshot the full ZFS backup dataset
SNAPSHOT_NAME="${DEST_DATASET}@${BACKUP_LABEL}"
echo "📸 Creating ZFS snapshot: $SNAPSHOT_NAME"
zfs snapshot "$SNAPSHOT_NAME"
if [[ $? -eq 0 ]]; then
echo "✅ Snapshot $SNAPSHOT_NAME created successfully."
else
echo "❌ Snapshot creation failed."
exit 1
fi
# Optional: Clean up the remote backup dir
echo "🧹 Cleaning up temporary backup on $SOURCE_SERVER..."
ssh $SOURCE_USER@$SOURCE_SERVER "sudo rm -rf '$REMOTE_BACKUP_DIR'"
echo "🎉 Backup and ZFS snapshot complete. Stored in $FULL_DEST"

View File

@ -0,0 +1,101 @@
#!/bin/bash
# === CONFIG ===
REMOTE_USER="doc"
BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k"
CHAT_ID="1559582356"
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log"
# Thresholds
SWAP_LIMIT_MB=512
LOAD_LIMIT=4.0
mkdir -p "$HOME/krang-logs"
SERVERS=(
thevault.sshjunkie.com
zcluster.technodrome1.sshjunkie.com
zcluster.technodrome2.sshjunkie.com
shredder.sshjunkie.com
chatwithus.live
)
SUMMARY="📡 Krang System Health Report - $TIMESTAMP
"
for HOST in "${SERVERS[@]}"; do
echo "🔍 Collecting from $HOST..."
DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF'
HOST=$(hostname)
MEM=$(free -h | awk '/Mem:/ {print $4 " free"}')
SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}')
SWAP="$SWAP_RAW Mi used"
DISK=$(df -h / | awk 'NR==2 {print $4 " free"}')
LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs)
APACHE=$(systemctl is-active apache2 2>/dev/null || systemctl is-active httpd 2>/dev/null)
[ "$APACHE" = "active" ] && APACHE_STATUS="✅ Apache running" || APACHE_STATUS="❌ Apache not running"
echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$APACHE_STATUS"
EOF
)
IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 APACHE_STATUS <<< "$DATA"
ALERTS=""
if (( SWAP_MB > SWAP_LIMIT_MB )); then
ALERTS+="⚠️ HIGH SWAP ($SWAP_HUMAN)
"
fi
LOAD_INT=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}")
if [ "$LOAD_INT" -eq 1 ]; then
ALERTS+="⚠️ HIGH LOAD ($LOAD1)
"
fi
ALERTS_MSG=""
[ -n "$ALERTS" ] && ALERTS_MSG="🚨 ALERTS:
$ALERTS"
SUMMARY+="🖥️ $H
• Mem: $MEM
• Swap: $SWAP_HUMAN
• Disk: $DISK
• Load: $LOAD1
$APACHE_STATUS
$ALERTS_MSG
"
done
# === KRANG CLOCK ACCURACY CHECK ===
NTP_RESULT=$(ntpdate -q time.google.com 2>&1)
OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}')
OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}")
if [[ -n "$OFFSET_MS" ]]; then
if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then
# Auto-correct the system clock
CORRECTION=$(ntpdate -u time.google.com 2>&1)
SUMMARY+="🛠️ Auto-corrected Krang clock via ntpdate: $CORRECTION
"
SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ⚠️ OUT OF SYNC
"
else
SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ✅ SYNCHRONIZED
"
fi
else
SUMMARY+="🕰️ Krang Clock Check: ❌ FAILED to retrieve offset.
"
fi
# Log to file
echo -e "$SUMMARY" > "$LOGFILE"
# Send to Telegram
curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage -d chat_id="$CHAT_ID" -d text="$SUMMARY"

0
miscellaneous/bash/rsync_zfs_sync_helper.sh Normal file → Executable file
View File

View File

@ -60,7 +60,7 @@
]
},
"chaos": {
"recording": true,
"recording": false,
"duration": 10800,
"schedule": [
{