Auto-commit from giteapush.sh at 2025-05-14 07:12:51
This commit is contained in:
parent
e4f1e3396c
commit
0739d35b2f
87
documents/procedures/databasecluster.md
Normal file
87
documents/procedures/databasecluster.md
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
# Database Cluster (baboon.sshjunkie.com)
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
The database cluster consists of two PostgreSQL database servers hosted on `baboon.sshjunkie.com`. These servers are used to store data for services such as Mastodon and AzuraCast. The cluster ensures high availability and fault tolerance through replication and backup strategies.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
Install PostgreSQL on both nodes in the cluster:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Update package list and install PostgreSQL
|
||||||
|
sudo apt update
|
||||||
|
sudo apt install -y postgresql postgresql-contrib
|
||||||
|
|
||||||
|
# Ensure PostgreSQL is running
|
||||||
|
sudo systemctl start postgresql
|
||||||
|
sudo systemctl enable postgresql
|
||||||
|
```
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
### PostgreSQL Configuration Files:
|
||||||
|
- **pg_hba.conf**:
|
||||||
|
- Allow replication and local connections.
|
||||||
|
- Example:
|
||||||
|
```ini
|
||||||
|
local all postgres md5
|
||||||
|
host replication all 192.168.0.0/16 md5
|
||||||
|
```
|
||||||
|
- **postgresql.conf**:
|
||||||
|
- Set `wal_level` for replication:
|
||||||
|
```ini
|
||||||
|
wal_level = hot_standby
|
||||||
|
max_wal_senders = 3
|
||||||
|
```
|
||||||
|
|
||||||
|
### Replication Configuration:
|
||||||
|
- Set up streaming replication between the two nodes (`baboon.sshjunkie.com` as the master and the second node as the replica).
|
||||||
|
|
||||||
|
1. On the master node, enable replication and restart PostgreSQL.
|
||||||
|
2. On the replica node, set up replication by copying the data directory from the master node and configure the `recovery.conf` file.
|
||||||
|
|
||||||
|
Example `recovery.conf` on the replica:
|
||||||
|
```ini
|
||||||
|
standby_mode = on
|
||||||
|
primary_conninfo = 'host=baboon.sshjunkie.com port=5432 user=replicator password=your_password'
|
||||||
|
trigger_file = '/tmp/postgresql.trigger.5432'
|
||||||
|
```
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
- **Check the status of PostgreSQL**:
|
||||||
|
```bash
|
||||||
|
sudo systemctl status postgresql
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Promote the replica to master**:
|
||||||
|
```bash
|
||||||
|
pg_ctl promote -D /var/lib/postgresql/data
|
||||||
|
```
|
||||||
|
|
||||||
|
## Backups
|
||||||
|
Use `pg_basebackup` to create full backups of the cluster. Example:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pg_basebackup -h baboon.sshjunkie.com -U replicator -D /backups/db_backup -Ft -z -P
|
||||||
|
```
|
||||||
|
|
||||||
|
Automate backups with cronjobs for regular snapshots.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
- **Issue**: Replica is lagging behind.
|
||||||
|
- **Solution**: Check network connectivity and ensure the replica is able to connect to the master node. Monitor replication lag with:
|
||||||
|
```bash
|
||||||
|
SELECT * FROM pg_stat_replication;
|
||||||
|
```
|
||||||
|
|
||||||
|
## Monitoring
|
||||||
|
- **Monitor replication status**:
|
||||||
|
```bash
|
||||||
|
SELECT * FROM pg_stat_replication;
|
||||||
|
```
|
||||||
|
|
||||||
|
- **Monitor database health**:
|
||||||
|
```bash
|
||||||
|
pg_isready
|
||||||
|
```
|
||||||
|
|
||||||
|
## Additional Information
|
||||||
|
- [PostgreSQL Streaming Replication Documentation](https://www.postgresql.org/docs/current/warm-standby.html)
|
@ -1,79 +1,69 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Script Name: db2_zfs_backup.sh
|
||||||
|
# Description: Creates a raw base backup of PostgreSQL on zcluster.technodrome2 using pg_basebackup in directory mode.
|
||||||
|
# Transfers the backup to The Vault’s ZFS dataset and snapshots it for long-term retention.
|
||||||
|
# Requirements: pg_basebackup, SSH access, rclone or rsync, ZFS dataset available at destination
|
||||||
|
# Usage: ./db2_zfs_backup.sh
|
||||||
|
# Author: Doc @ Genesis Ops
|
||||||
|
# Date: 2025-05-12
|
||||||
|
#
|
||||||
|
|
||||||
SOURCE_DIR="/tmp/db2_backup/"
|
### CONFIGURATION ###
|
||||||
DEST_DIR="thevault:/nexus/postgresql/db2/"
|
SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com"
|
||||||
DATE=$(date +%Y%m%d%H%M)
|
SOURCE_USER="doc"
|
||||||
PG_USER="postgres"
|
PG_USER="postgres"
|
||||||
SOURCE_SERVER="zcluster.technodrome2.sshjunkie.com" # Source server (database server)
|
SOURCE_BASE_DIR="/tmp/db2_backup" # On the remote node
|
||||||
SOURCE_REMOTE="technodrome2" # rclone remote for source server (configured earlier)
|
BACKUP_LABEL="$(date +%Y%m%d%H%M)"
|
||||||
DEST_REMOTE="thevault" # rclone remote for destination server (The Vault)
|
REMOTE_BACKUP_DIR="$SOURCE_BASE_DIR/$BACKUP_LABEL"
|
||||||
|
|
||||||
# Ensure necessary tools are available
|
# Remote source rclone config (optional)
|
||||||
command -v pg_basebackup >/dev/null 2>&1 || { echo "pg_basebackup not found, exiting."; exit 1; }
|
SOURCE_REMOTE="technodrome2"
|
||||||
command -v rclone >/dev/null 2>&1 || { echo "rclone not found, exiting."; exit 1; }
|
|
||||||
command -v ssh >/dev/null 2>&1 || { echo "ssh not found, exiting."; exit 1; }
|
|
||||||
|
|
||||||
# Step 1: SSH into the database server and run pg_basebackup
|
# Local destination
|
||||||
echo "Starting pg_basebackup for db2 on $SOURCE_SERVER..."
|
DEST_DATASET="vaultpool/postgresql/db2" # Adjust as needed
|
||||||
|
DEST_MOUNT="/nexus/postgresql/db2" # Must be mountpoint for $DEST_DATASET
|
||||||
|
FULL_DEST="$DEST_MOUNT/$BACKUP_LABEL"
|
||||||
|
|
||||||
# Ensure the directory exists on the source server
|
#####################
|
||||||
ssh doc@$SOURCE_SERVER "sudo mkdir -p $SOURCE_DIR$DATE" # Create the directory if it doesn't exist
|
|
||||||
|
|
||||||
# Run pg_basebackup
|
echo "🚀 Starting ZFS-aware base backup for db2 from $SOURCE_SERVER..."
|
||||||
ssh doc@$SOURCE_SERVER "pg_basebackup -h localhost -D $SOURCE_DIR$DATE -U $PG_USER -Ft -z -P"
|
|
||||||
|
|
||||||
# Check if pg_basebackup was successful
|
# Ensure pg_basebackup will run cleanly
|
||||||
if [ $? -eq 0 ]; then
|
ssh $SOURCE_USER@$SOURCE_SERVER "sudo mkdir -p '$REMOTE_BACKUP_DIR' && \
|
||||||
echo "pg_basebackup completed successfully. Verifying backup directory on $SOURCE_SERVER..."
|
sudo pg_basebackup -h localhost -D '$REMOTE_BACKUP_DIR' -U $PG_USER -Fp -R -X fetch -P"
|
||||||
|
|
||||||
# Verify that the backup directory exists
|
if [[ $? -ne 0 ]]; then
|
||||||
ssh doc@$SOURCE_SERVER "ls -l $SOURCE_DIR$DATE"
|
echo "❌ pg_basebackup failed on $SOURCE_SERVER."
|
||||||
|
|
||||||
# If the directory exists, proceed with rclone transfer
|
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "Backup directory exists. Proceeding to rclone transfer..."
|
|
||||||
|
|
||||||
# Step 2: Use rclone to copy the backup from the source server to The Vault
|
|
||||||
retry=0
|
|
||||||
max_retries=3
|
|
||||||
while ! rclone copy $SOURCE_REMOTE:$SOURCE_DIR$DATE/ $DEST_REMOTE:$DEST_DIR --progress --checksum; do
|
|
||||||
if [ $retry -ge $max_retries ]; then
|
|
||||||
echo "Rclone transfer failed after $max_retries attempts."
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
retry=$((retry+1))
|
|
||||||
echo "Retrying rclone transfer... attempt $retry"
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
|
|
||||||
# Check if rclone was successful
|
echo "📦 Backup created on $SOURCE_SERVER at $REMOTE_BACKUP_DIR"
|
||||||
if [ $? -eq 0 ]; then
|
|
||||||
echo "Rclone transfer completed successfully. Proceeding to snapshot..."
|
|
||||||
|
|
||||||
# Step 3: Create a ZFS snapshot on The Vault
|
# Pull the backup using rsync (preserves structure + timestamps)
|
||||||
ssh root@thevault.sshjunkie.com "sudo zfs snapshot nexus/postgresql/db2@$DATE"
|
echo "🔄 Syncing backup to The Vault at $FULL_DEST..."
|
||||||
|
mkdir -p "$FULL_DEST"
|
||||||
|
rsync -avz --progress $SOURCE_USER@$SOURCE_SERVER:"$REMOTE_BACKUP_DIR/" "$FULL_DEST/"
|
||||||
|
|
||||||
# Verify snapshot creation on The Vault
|
if [[ $? -ne 0 ]]; then
|
||||||
ssh root@thevault.sshjunkie.com "zfs list -t snapshot | grep nexus/postgresql/db2@$DATE" >/dev/null 2>&1
|
echo "❌ rsync transfer failed!"
|
||||||
if [ $? -eq 0 ]; then
|
exit 1
|
||||||
echo "ZFS snapshot nexus/postgresql/db2@$DATE created successfully on The Vault."
|
fi
|
||||||
|
|
||||||
|
# Snapshot the full ZFS backup dataset
|
||||||
|
SNAPSHOT_NAME="${DEST_DATASET}@${BACKUP_LABEL}"
|
||||||
|
echo "📸 Creating ZFS snapshot: $SNAPSHOT_NAME"
|
||||||
|
zfs snapshot "$SNAPSHOT_NAME"
|
||||||
|
|
||||||
|
if [[ $? -eq 0 ]]; then
|
||||||
|
echo "✅ Snapshot $SNAPSHOT_NAME created successfully."
|
||||||
else
|
else
|
||||||
echo "Snapshot creation failed on The Vault."
|
echo "❌ Snapshot creation failed."
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Error during rclone transfer. Backup not transferred."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Backup directory not found on $SOURCE_SERVER. Aborting transfer."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
else
|
|
||||||
echo "Error during pg_basebackup. Backup not created."
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Clean up after success
|
# Optional: Clean up the remote backup dir
|
||||||
ssh doc@$SOURCE_SERVER "sudo rm -rf $SOURCE_DIR$DATE"
|
echo "🧹 Cleaning up temporary backup on $SOURCE_SERVER..."
|
||||||
echo "Backup and snapshot process completed successfully."
|
ssh $SOURCE_USER@$SOURCE_SERVER "sudo rm -rf '$REMOTE_BACKUP_DIR'"
|
||||||
|
|
||||||
|
echo "🎉 Backup and ZFS snapshot complete. Stored in $FULL_DEST"
|
||||||
|
101
miscellaneous/bash/pull_health_everywhere_ntp.sh
Executable file
101
miscellaneous/bash/pull_health_everywhere_ntp.sh
Executable file
@ -0,0 +1,101 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# === CONFIG ===
|
||||||
|
REMOTE_USER="doc"
|
||||||
|
BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k"
|
||||||
|
CHAT_ID="1559582356"
|
||||||
|
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
||||||
|
LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log"
|
||||||
|
|
||||||
|
# Thresholds
|
||||||
|
SWAP_LIMIT_MB=512
|
||||||
|
LOAD_LIMIT=4.0
|
||||||
|
|
||||||
|
mkdir -p "$HOME/krang-logs"
|
||||||
|
|
||||||
|
SERVERS=(
|
||||||
|
thevault.sshjunkie.com
|
||||||
|
zcluster.technodrome1.sshjunkie.com
|
||||||
|
zcluster.technodrome2.sshjunkie.com
|
||||||
|
shredder.sshjunkie.com
|
||||||
|
chatwithus.live
|
||||||
|
)
|
||||||
|
|
||||||
|
SUMMARY="📡 Krang System Health Report - $TIMESTAMP
|
||||||
|
|
||||||
|
"
|
||||||
|
|
||||||
|
for HOST in "${SERVERS[@]}"; do
|
||||||
|
echo "🔍 Collecting from $HOST..."
|
||||||
|
|
||||||
|
DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF'
|
||||||
|
HOST=$(hostname)
|
||||||
|
MEM=$(free -h | awk '/Mem:/ {print $4 " free"}')
|
||||||
|
SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}')
|
||||||
|
SWAP="$SWAP_RAW Mi used"
|
||||||
|
DISK=$(df -h / | awk 'NR==2 {print $4 " free"}')
|
||||||
|
LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs)
|
||||||
|
APACHE=$(systemctl is-active apache2 2>/dev/null || systemctl is-active httpd 2>/dev/null)
|
||||||
|
[ "$APACHE" = "active" ] && APACHE_STATUS="✅ Apache running" || APACHE_STATUS="❌ Apache not running"
|
||||||
|
|
||||||
|
echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$APACHE_STATUS"
|
||||||
|
EOF
|
||||||
|
)
|
||||||
|
|
||||||
|
IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 APACHE_STATUS <<< "$DATA"
|
||||||
|
|
||||||
|
ALERTS=""
|
||||||
|
if (( SWAP_MB > SWAP_LIMIT_MB )); then
|
||||||
|
ALERTS+="⚠️ HIGH SWAP ($SWAP_HUMAN)
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
|
LOAD_INT=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}")
|
||||||
|
if [ "$LOAD_INT" -eq 1 ]; then
|
||||||
|
ALERTS+="⚠️ HIGH LOAD ($LOAD1)
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
|
ALERTS_MSG=""
|
||||||
|
[ -n "$ALERTS" ] && ALERTS_MSG="🚨 ALERTS:
|
||||||
|
$ALERTS"
|
||||||
|
|
||||||
|
SUMMARY+="🖥️ $H
|
||||||
|
• Mem: $MEM
|
||||||
|
• Swap: $SWAP_HUMAN
|
||||||
|
• Disk: $DISK
|
||||||
|
• Load: $LOAD1
|
||||||
|
• $APACHE_STATUS
|
||||||
|
$ALERTS_MSG
|
||||||
|
|
||||||
|
"
|
||||||
|
done
|
||||||
|
|
||||||
|
# === KRANG CLOCK ACCURACY CHECK ===
|
||||||
|
NTP_RESULT=$(ntpdate -q time.google.com 2>&1)
|
||||||
|
OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}')
|
||||||
|
OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}")
|
||||||
|
|
||||||
|
if [[ -n "$OFFSET_MS" ]]; then
|
||||||
|
if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then
|
||||||
|
# Auto-correct the system clock
|
||||||
|
CORRECTION=$(ntpdate -u time.google.com 2>&1)
|
||||||
|
SUMMARY+="🛠️ Auto-corrected Krang clock via ntpdate: $CORRECTION
|
||||||
|
"
|
||||||
|
|
||||||
|
SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ⚠️ OUT OF SYNC
|
||||||
|
"
|
||||||
|
else
|
||||||
|
SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ✅ SYNCHRONIZED
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
SUMMARY+="🕰️ Krang Clock Check: ❌ FAILED to retrieve offset.
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Log to file
|
||||||
|
echo -e "$SUMMARY" > "$LOGFILE"
|
||||||
|
|
||||||
|
# Send to Telegram
|
||||||
|
curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage -d chat_id="$CHAT_ID" -d text="$SUMMARY"
|
0
miscellaneous/bash/rsync_zfs_sync_helper.sh
Normal file → Executable file
0
miscellaneous/bash/rsync_zfs_sync_helper.sh
Normal file → Executable file
@ -60,7 +60,7 @@
|
|||||||
]
|
]
|
||||||
},
|
},
|
||||||
"chaos": {
|
"chaos": {
|
||||||
"recording": true,
|
"recording": false,
|
||||||
"duration": 10800,
|
"duration": 10800,
|
||||||
"schedule": [
|
"schedule": [
|
||||||
{
|
{
|
||||||
|
Loading…
x
Reference in New Issue
Block a user