From 0732043d62f8b384cfbbba7a5c1f8ceb34311a2c Mon Sep 17 00:00:00 2001 From: DocTator Date: Fri, 16 May 2025 17:00:01 -0400 Subject: [PATCH] Auto-commit from giteapush.sh at 2025-05-16 17:00:01 --- documents/blog/minio.md | 77 ++++++++++++ miscellaneous/bash/dasystemisdownyo.sh | 130 +++++++++++++++++++++ miscellaneous/bash/genesis_check.sh | 24 ++++ miscellaneous/bash/health.sh | 107 ----------------- miscellaneous/bash/krang_modular_health.sh | 112 ++++++++++++++++++ miscellaneous/python/uptime_server.py | 48 ++++++++ 6 files changed, 391 insertions(+), 107 deletions(-) create mode 100644 documents/blog/minio.md create mode 100755 miscellaneous/bash/dasystemisdownyo.sh create mode 100755 miscellaneous/bash/genesis_check.sh delete mode 100755 miscellaneous/bash/health.sh create mode 100755 miscellaneous/bash/krang_modular_health.sh create mode 100644 miscellaneous/python/uptime_server.py diff --git a/documents/blog/minio.md b/documents/blog/minio.md new file mode 100644 index 0000000..f9b8d58 --- /dev/null +++ b/documents/blog/minio.md @@ -0,0 +1,77 @@ +# MinIO: It Works, But It Hates You + +*By someone who survived a 150,000-file sync and lived to tell the tale.* + +--- + +MinIO is fast. It's lightweight. It's compatible with Amazon S3. It’s everything you want in a self-hosted object storage system. + +Until you try to **use it like a filesystem**. + +Then it becomes the most temperamental, moody, selectively mute piece of software you've ever met. + +--- + +## What I Was Trying to Do + +All I wanted was to migrate ~40GB of Mastodon media from local disk into a MinIO bucket. Nothing fancy. Just a clean `rclone sync` and a pat on the back. + +--- + +## What Actually Happened + +- **Load average spiked to 33** +- `find` froze +- `rclone size` hung +- `zfs snapshot` stalled so long I thought the server died +- The MinIO **UI lied to my face** about how much data was present (5GB when `rclone` said 22GB) +- Directory paths that looked like files. Files that were secretly directories. I saw `.meta` and `.part.1` in my dreams. + +--- + +## The Root Problem + +MinIO is **not** a filesystem. + +It's a flat key-value object store that's just *pretending* to be a folder tree. And when you throw 150,000+ nested objects at it — especially from a tool like `rclone` — all the lies unravel. + +It keeps going, but only if: +- You feed it one file at a time +- You don’t ask it questions (`rclone ls`, `rclone size`, `find`, etc.) +- You don’t use the UI expecting it to reflect reality + +--- + +## The Fixes That Kept Me Sane + +- Switched from `rclone ls` to `rclone size` with `--json` (when it worked) +- Cleaned up thousands of broken `.meta`/`.part.*` directories using a targeted script +- Paused `rclone` mid-sync with `kill -STOP` to get snapshots to complete +- Used `du -sh` instead of `find` to track usage +- Lowered `rclone` concurrency with `--transfers=4 --checkers=4` +- Drank water. A lot of it. + +--- + +## The Moral of the Story + +If you're going to use MinIO for massive sync jobs, treat it like: + +- A **delicate black box** with fast internals but fragile mood +- Something that **prefers to be written to, not inspected** +- An S3 clone with boundary issues + +--- + +## Final Thought + +MinIO *does* work. It's powerful. It’s fast. But it also absolutely hates being watched while it works. + +And you won't realize how much until you're 100,000 files deep, snapshot frozen, and `rclone` is telling you you're doing great — while the UI smirks and says you're at 5 gigs. + +MinIO: It works. +But it hates you. + +--- + +**Filed under:** `disaster recovery`, `object storage`, `sync trauma`, `zfs`, `rclone`, `why me` diff --git a/miscellaneous/bash/dasystemisdownyo.sh b/miscellaneous/bash/dasystemisdownyo.sh new file mode 100755 index 0000000..c1ae1c9 --- /dev/null +++ b/miscellaneous/bash/dasystemisdownyo.sh @@ -0,0 +1,130 @@ +#!/bin/bash + +# === CONFIG === +REMOTE_USER="doc" +BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k" +CHAT_ID="1559582356" +TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') +LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log" + +SWAP_LIMIT_MB=512 +LOAD_LIMIT=4.0 +mkdir -p "$HOME/krang-logs" + +SERVERS=( + thevault.sshjunkie.com + zcluster.technodrome1.sshjunkie.com + zcluster.technodrome2.sshjunkie.com + shredder.sshjunkie.com + chatwithus.live + portal.genesishostingtechnologies.com +) + +declare -A HOST_ROLES=( + [thevault]="backup zfs" + [zcluster.technodrome1]="postgres" + [zcluster.technodrome2]="postgres" + [shredder]="minio docker" + [chatwithus.live]="mastodon docker nginx" + [azuracast.sshjunkie.com]="azuracast docker nginx" +) + +SUMMARY="📡 Krang System Health Report - $TIMESTAMP + +" + +for HOST in "${SERVERS[@]}"; do + SHORT_HOST=$(echo "$HOST" | cut -d'.' -f1) + echo "🔍 Collecting from $HOST..." + + DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF' +set -e +HOST=$(hostname) +MEM=$(awk '/MemAvailable/ {printf "%.1f Gi free", $2 / 1024 / 1024}' /proc/meminfo) +SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}') +SWAP="$SWAP_RAW Mi used" +DISK=$(df -h / | awk 'NR==2 {print $4 " free"}') +LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs) +UPTIME=$(uptime -p) + +# Graceful service status checks +check_status() { + systemctl is-active "$1" 2>/dev/null || echo "inactive" +} +NGINX=$(check_status nginx) +DOCKER=$(check_status docker) +PGSQL=$(check_status postgresql) + +echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$UPTIME|$NGINX|$DOCKER|$PGSQL" +EOF +) || { + SUMMARY+="🖥️ $HOST +❌ Failed to connect or run checks. +" + continue +} + + IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 UPTIME_STATUS NGINX_STATUS DOCKER_STATUS PGSQL_STATUS <<< "$DATA" + ROLES="${HOST_ROLES[$SHORT_HOST]}" + ALERTS="" + + if [[ -n "$SWAP_MB" && "$SWAP_MB" =~ ^[0-9]+$ && "$SWAP_MB" -gt "$SWAP_LIMIT_MB" ]]; then + ALERTS+="⚠️ HIGH SWAP ($SWAP_HUMAN) +" + fi + + if [[ -n "$LOAD1" ]]; then + LOAD_HIGH=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}") + [ "$LOAD_HIGH" -eq 1 ] && ALERTS+="⚠️ HIGH LOAD ($LOAD1) +" + fi + + [[ "$ROLES" == *"nginx"* && "$NGINX_STATUS" != "active" ]] && ALERTS+="❌ NGINX not running +" + [[ "$ROLES" == *"docker"* && "$DOCKER_STATUS" != "active" ]] && ALERTS+="❌ Docker not running +" + [[ "$ROLES" == *"postgres"* && "$PGSQL_STATUS" != "active" ]] && ALERTS+="❌ PostgreSQL not running +" + + ALERTS_MSG="" + [ -n "$ALERTS" ] && ALERTS_MSG="🚨 ALERTS: +$ALERTS" + + SUMMARY+="🖥️ $H +• Mem: $MEM +• Swap: $SWAP_HUMAN +• Disk: $DISK +• Load: ${LOAD1:-Unavailable} +• Uptime: $UPTIME_STATUS +• Roles: ${ROLES:-none} +$ALERTS_MSG +" +done + +# === KRANG CLOCK ACCURACY CHECK === +NTP_RESULT=$(ntpdate -q time.google.com 2>&1) +OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}') +if [[ "$OFFSET" =~ ^-?[0-9.]+$ ]]; then + OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}") + if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then + CORRECTION=$(ntpdate -u time.google.com 2>&1) + SUMMARY+="🛠️ Auto-corrected Krang clock via ntpdate: $CORRECTION +" + SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ⚠️ OUT OF SYNC +" + else + SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ✅ SYNCHRONIZED +" + fi +else + SUMMARY+="🕰️ Krang Clock Check: ❌ FAILED to retrieve offset. +" +fi + +# Log to file +echo -e "$SUMMARY" > "$LOGFILE" + +# Send to Telegram +curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage \ + -d chat_id="$CHAT_ID" \ + -d text="$SUMMARY" diff --git a/miscellaneous/bash/genesis_check.sh b/miscellaneous/bash/genesis_check.sh new file mode 100755 index 0000000..d1ab702 --- /dev/null +++ b/miscellaneous/bash/genesis_check.sh @@ -0,0 +1,24 @@ +#!/bin/bash +mkdir -p /var/log/genesis_uptime + +declare -A services=( + [radio]="https://genesis-radio.net" + [mastodon]="https://chatwithus.live" + [minio]="https://console.sshjunkie.com" + [azura]="https://portal.genesishostingtechnologies.com" + [teamtalk]="http://tt.themediahub.org" + [directadmin]="https://da.genesishostingtechnologies.com" +) + +timestamp=$(date -u +"%Y-%m-%dT%H:%M:%S") + +for service in "${!services[@]}" +do + url=${services[$service]} + curl --head --silent --max-time 10 "$url" >/dev/null + if [ $? -eq 0 ]; then + echo "$timestamp,up" >> "/var/log/genesis_uptime/$service.log" + else + echo "$timestamp,down" >> "/var/log/genesis_uptime/$service.log" + fi +done diff --git a/miscellaneous/bash/health.sh b/miscellaneous/bash/health.sh deleted file mode 100755 index 07efab7..0000000 --- a/miscellaneous/bash/health.sh +++ /dev/null @@ -1,107 +0,0 @@ -#!/bin/bash - -# List of servers (replace these with your actual server hostnames or IPs) -servers=("thevault.sshjunkie.com" "zcluster.technodrome1.sshjunkie.com" "zcluster.technodrome2.sshjunkie.com" "shredder.sshjunkie.com" "root@chatwithus.live") # Add as many servers as needed - -# Define log file -log_file="/home/doc/system_health_report_$(date +%F).log" - -# Telegram Bot API Token and Chat ID -BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k" -CHAT_ID="1559582356" -TELEGRAM_API="https://api.telegram.org/bot$BOT_TOKEN/sendMessage" - -# Function to escape Markdown special characters -escape_markdown() { - echo "$1" | sed 's/\([_*[]\)/\\\1/g' # Escape _, *, [, and ] -} - -# Function to send the report to Telegram -send_to_telegram() { - local message=$1 - local chunk_size=4096 - local start=0 - local end=$chunk_size - - while [ ${#message} -gt $start ]; do - chunk="${message:$start:$chunk_size}" - curl -s -X POST $TELEGRAM_API \ - -d chat_id=$CHAT_ID \ - -d text="$chunk" \ - -d parse_mode="Markdown" - start=$end - end=$((start + chunk_size)) - done -} - -# Function to run the checks on each server -check_health() { - server=$1 - server_report="=== Health Check for $server ===\n" - - # Memory Usage (summary) - memory_output=$(ssh $server "free -h") - free_memory=$(echo "$memory_output" | awk 'NR==2 {print $4}') - if [[ "$free_memory" < "1.0Gi" ]]; then - server_report+="Memory: Low memory available! Only $free_memory free, consider adding more RAM.\n" - else - server_report+="Memory: Sufficient memory available ($free_memory free).\n" - fi - - # Swap Usage (summary) - swap_output=$(ssh $server "swapon -s") - swap_used=$(echo "$swap_output" | awk '{if(NR>1) print $3}') - if [[ "$swap_used" > "1Gi" ]]; then - server_report+="Swap: High swap usage ($swap_used used). This may indicate memory pressure.\n" - else - server_report+="Swap: Minimal swap usage ($swap_used used).\n" - fi - - # CPU Load (summary) - cpu_output=$(ssh $server "uptime") - load_avg=$(echo "$cpu_output" | awk '{print $10}' | sed 's/,//') - if (( $(echo "$load_avg > 2.0" | bc -l) )); then - server_report+="CPU Load: High load average ($load_avg). Check if any processes are consuming too much CPU.\n" - else - server_report+="CPU Load: Normal load average ($load_avg).\n" - fi - - # Disk Space (summary) - disk_output=$(ssh $server "df -h") - disk_free=$(echo "$disk_output" | grep -v 'tmpfs' | grep -v 'Filesystem' | awk '{print $4}' | sed 's/[A-Za-z]*//g' | head -n 1) - if [[ "$disk_free" < "10G" ]]; then - server_report+="Disk Space: Low disk space! Only $disk_free free. Consider cleaning up or adding storage.\n" - else - server_report+="Disk Space: Sufficient disk space ($disk_free free).\n" - fi - - # Service Check (example: check if Apache is running) - apache_status=$(ssh $server "systemctl is-active apache2") - if [[ "$apache_status" == "active" ]]; then - server_report+="Apache: Running normally.\n" - else - server_report+="Apache: Not running. Check service logs for issues.\n" - fi - - # Additional Checks (summary: disk I/O, uptime) - iostat_output=$(ssh $server "iostat | head -n 2") # Get summary of disk I/O - uptime_output=$(ssh $server "uptime") - server_report+="Disk I/O: $iostat_output\n" - server_report+="Uptime: $uptime_output\n" - - # Escape Markdown special characters - escaped_report=$(escape_markdown "$server_report") - - # Send the server-specific summary to Telegram - send_to_telegram "$escaped_report" - - # Separator for readability in log file - echo -e "\n=====================\n" >> $log_file -} - -# Main loop to go through each server -for server in "${servers[@]}"; do - check_health $server -done - -echo "Health check completed. Reports sent to Telegram and saved to $log_file." diff --git a/miscellaneous/bash/krang_modular_health.sh b/miscellaneous/bash/krang_modular_health.sh new file mode 100755 index 0000000..7b781e3 --- /dev/null +++ b/miscellaneous/bash/krang_modular_health.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +# === CONFIG === +REMOTE_USER="doc" +BOT_TOKEN="8178867489:AAH0VjN7VnZSCIWasSz_y97iBLLjPJA751k" +CHAT_ID="1559582356" +TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S') +LOGFILE="$HOME/krang-logs/health-$(date '+%Y%m%d-%H%M').log" + +# Thresholds +SWAP_LIMIT_MB=512 +LOAD_LIMIT=4.0 + +mkdir -p "$HOME/krang-logs" + +SERVERS=( + thevault.sshjunkie.com + zcluster.technodrome1.sshjunkie.com + zcluster.technodrome2.sshjunkie.com + shredder.sshjunkie.com + chatwithus.live +) + +SUMMARY="📡 Krang System Health Report - $TIMESTAMP + +" + +for HOST in "${SERVERS[@]}"; do + echo "🔍 Collecting from $HOST..." + + DATA=$(ssh "$REMOTE_USER@$HOST" bash -s << 'EOF' +HOST=$(hostname) +MEM=$(awk '/MemAvailable/ {printf "%.1f Gi free", $2 / 1024 / 1024}' /proc/meminfo) +SWAP_RAW=$(free -m | awk '/Swap:/ {print $3}') +SWAP="$SWAP_RAW Mi used" +DISK=$(df -h / | awk 'NR==2 {print $4 " free"}') +LOAD=$(uptime | awk -F'load average:' '{print $2}' | cut -d, -f1 | xargs) +UPTIME=$(uptime -p) + +# Optional service checks +NGINX=$(systemctl is-active nginx 2>/dev/null) +DOCKER=$(systemctl is-active docker 2>/dev/null) +PGSQL=$(systemctl is-active postgresql 2>/dev/null || systemctl is-active postgresql@14-main 2>/dev/null) + +echo "$HOST|$MEM|$SWAP_RAW|$SWAP|$DISK|$LOAD|$UPTIME|$NGINX|$DOCKER|$PGSQL" +EOF +) + + IFS='|' read -r H MEM SWAP_MB SWAP_HUMAN DISK LOAD1 UPTIME_STATUS NGINX_STATUS DOCKER_STATUS PGSQL_STATUS <<< "$DATA" + + ALERTS="" + if (( SWAP_MB > SWAP_LIMIT_MB )); then + ALERTS+="⚠️ HIGH SWAP ($SWAP_HUMAN) +" + fi + + LOAD_INT=$(awk "BEGIN {print ($LOAD1 > $LOAD_LIMIT) ? 1 : 0}") + if [ "$LOAD_INT" -eq 1 ]; then + ALERTS+="⚠️ HIGH LOAD ($LOAD1) +" + fi + + [ "$NGINX_STATUS" != "active" ] && ALERTS+="❌ NGINX not running +" + [ "$DOCKER_STATUS" != "active" ] && ALERTS+="❌ Docker not running +" + [ "$PGSQL_STATUS" != "active" ] && ALERTS+="❌ PostgreSQL not running +" + + ALERTS_MSG="" + [ -n "$ALERTS" ] && ALERTS_MSG="🚨 ALERTS: +$ALERTS" + + SUMMARY+="🖥️ $H +• Mem: $MEM +• Swap: $SWAP_HUMAN +• Disk: $DISK +• Load: $LOAD1 +• Uptime: $UPTIME_STATUS +$ALERTS_MSG +" +done + +# === KRANG CLOCK ACCURACY CHECK === +NTP_RESULT=$(ntpdate -q time.google.com 2>&1) +OFFSET=$(echo "$NTP_RESULT" | awk '/offset/ {print $10}') +OFFSET_MS=$(awk "BEGIN {printf "%.0f", $OFFSET * 1000}") + +if [[ -n "$OFFSET_MS" ]]; then + if (( OFFSET_MS > 500 || OFFSET_MS < -500 )); then + # Auto-correct the system clock + CORRECTION=$(ntpdate -u time.google.com 2>&1) + SUMMARY+="🛠️ Auto-corrected Krang clock via ntpdate: $CORRECTION +" + SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ⚠️ OUT OF SYNC +" + else + SUMMARY+="🕰️ Krang Clock Offset: ${OFFSET_MS}ms — ✅ SYNCHRONIZED +" + fi +else + SUMMARY+="🕰️ Krang Clock Check: ❌ FAILED to retrieve offset. +" +fi + +# Log to file +echo -e "$SUMMARY" > "$LOGFILE" + +# Send to Telegram +curl -s -X POST https://api.telegram.org/bot$BOT_TOKEN/sendMessage \ + -d chat_id="$CHAT_ID" \ + -d text="$SUMMARY" diff --git a/miscellaneous/python/uptime_server.py b/miscellaneous/python/uptime_server.py new file mode 100644 index 0000000..e89acb3 --- /dev/null +++ b/miscellaneous/python/uptime_server.py @@ -0,0 +1,48 @@ +from flask import Flask, jsonify +import os +import json +from datetime import datetime, timedelta + +app = Flask(__name__) + +LOG_DIR = "/var/log/genesis_uptime" +CHECK_WINDOW_HOURS = 24 + +SERVICES = { + "radio": "https://genesis-radio.net", + "mastodon": "https://chatwithus.live", + "minio": "https://console.sshjunkie.com", + "azura": "https://portal.genesishostingtechnologies.com", + "teamtalk": "http://tt.themediahub.org", + "directadmin": "https://da.genesishostingtechnologies.com" +} + +@app.route("/api/uptime/") +def get_uptime(service): + log_path = os.path.join(LOG_DIR, f"{service}.log") + if not os.path.exists(log_path): + return jsonify({"uptime": "n/a"}), 404 + + now = datetime.utcnow() + window_start = now - timedelta(hours=CHECK_WINDOW_HOURS) + + total = 0 + up = 0 + + with open(log_path, "r") as f: + for line in f: + try: + timestamp_str, status = line.strip().split(",") + timestamp = datetime.strptime(timestamp_str, "%Y-%m-%dT%H:%M:%S") + if timestamp >= window_start: + total += 1 + if status == "up": + up += 1 + except Exception: + continue + + uptime_percent = round((up / total) * 100, 2) if total > 0 else 0.0 + return jsonify({"uptime": uptime_percent}) + +if __name__ == "__main__": + app.run(host="0.0.0.0", port=5000)