import os import requests import datetime import paramiko import time # ==== CONFIG ==== MASTODON_INSTANCE = "https://chatwithus.live" MASTODON_TOKEN = "rimxBLi-eaJAcwagkmoj6UoW7Lc473tQY0cOM041Euw" MASTODON_USER_ID = "114386383616633367" HEALTHCHECK_HTML = "/var/www/html/healthcheck.html" DISK_WARN_THRESHOLD = 10 INODE_WARN_THRESHOLD = 10 LOG_FILES = ["/var/log/syslog", "/var/log/nginx/error.log"] LOG_PATTERNS = ["ERROR", "FATAL", "disk full", "out of memory"] SUPPRESSED_PATTERNS = ["SomeKnownHarmlessMastodonError"] NODES = [ {"name": "shredder", "host": "38.102.127.171", "ssh_user": "doc", "services": ["minio.service"], "disks": ["/", "/mnt/raid5"], "type": "remote", "db": False, "raid": True}, {"name": "mastodon", "host": "chatwithus.live", "ssh_user": "root", "services": ["nginx", "mastodon-web"], "disks": ["/"], "type": "remote", "db": False, "raid": False}, {"name": "db1", "host": "cluster.db1.genesishostingtechnologies.com", "ssh_user": "doc", "services": ["postgresql@16-main.service"], "disks": ["/", "/var/lib/postgresql"], "type": "remote", "db": True, "raid": False}, {"name": "db2", "host": "cluster.db2.genesishostingtechnologies.com", "ssh_user": "doc", "services": ["postgresql@16-main.service"], "disks": ["/", "/var/lib/postgresql"], "type": "remote", "db": True, "raid": False} ] # ==== Mastodon DM function with retry ==== def mastodon_dm(message, retries=3): url = f"{MASTODON_INSTANCE}/api/v1/statuses" headers = {"Authorization": f"Bearer {MASTODON_TOKEN}"} payload = {"status": message, "visibility": "direct", "in_reply_to_account_id": MASTODON_USER_ID} for attempt in range(retries): resp = requests.post(url, headers=headers, data=payload) if resp.status_code == 200: return print(f"Failed to send Mastodon DM (attempt {attempt+1}): {resp.text}") time.sleep(5) # ==== SSH command runner ==== def ssh_command(host, user, cmd): ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect(hostname=host, username=user, timeout=10) stdin, stdout, stderr = ssh.exec_command(cmd) out = stdout.read().decode().strip() ssh.close() return out # ==== Emoji chooser ==== def choose_emoji(line): if "RAID" in line: return "๐Ÿงจ" if "disk" in line.lower(): return "๐Ÿ“ˆ" if "rclone" in line.lower(): return "๐Ÿข" if "Service" in line: return "๐Ÿ›‘" if "Replication" in line: return "๐Ÿ’ฅ" return "โš ๏ธ" # ==== Check rclone health ==== def check_rclone_health(node): try: result = ssh_command(node["host"], node["ssh_user"], "rclone rc vfs/stats") if "error" in result.lower() or "failed" in result.lower(): return ("critical", f"[{node['name']}] ERROR: rclone health check failed. Output: {result}") if "bytesUsed" in result: bytes_used = int(result.split('"bytesUsed":')[1].split(',')[0].strip()) if bytes_used > 100000000000: return ("warning", f"[{node['name']}] WARNING: rclone cache usage high: {bytes_used} bytes used.") except Exception as e: return ("critical", f"[{node['name']}] ERROR: Could not check rclone health: {str(e)}") return None # ==== Remote log scan ==== def check_remote_logs(host, user, node_name): alerts = [] for log in LOG_FILES: cmd = f"tail -500 {log}" try: out = ssh_command(host, user, cmd) lines = out.split("\n") for pattern in LOG_PATTERNS: for line in lines: if pattern in line and not any(suppress in line for suppress in SUPPRESSED_PATTERNS): alerts.append(f"[{node_name}] WARNING: Pattern '{pattern}' in {log}") except Exception as e: alerts.append(f"[{node_name}] ERROR: Could not read log {log}: {e}") return alerts # ==== Main Routine ==== def main(): critical_problems = [] warning_problems = [] node_status = {} for node in NODES: status = "โœ… Healthy" if "rclone" in node.get("services", []): res = check_rclone_health(node) if res: level, msg = res if level == "critical": critical_problems.append(msg) status = "๐Ÿšจ Critical" else: warning_problems.append(msg) if status != "๐Ÿšจ Critical": status = "โš ๏ธ Warning" for disk in node["disks"]: res = check_remote_disk(node["host"], node["ssh_user"], disk, node["name"]) if res: if "CRITICAL" in res: critical_problems.append(res) status = "๐Ÿšจ Critical" elif "WARNING" in res and status != "๐Ÿšจ Critical": warning_problems.append(res) status = "โš ๏ธ Warning" for svc in node["services"]: res = check_remote_service(node["host"], node["ssh_user"], svc, node["name"]) if res: if "CRITICAL" in res: critical_problems.append(res) status = "๐Ÿšจ Critical" elif "WARNING" in res and status != "๐Ÿšจ Critical": warning_problems.append(res) status = "โš ๏ธ Warning" if node.get("db"): res = check_replication(node["host"], node["name"]) if res: critical_problems.append(res) status = "๐Ÿšจ Critical" if node.get("raid", False): res = check_remote_raid_md0(node["host"], node["ssh_user"], node["name"]) if res: if "CRITICAL" in res: critical_problems.append(res) status = "๐Ÿšจ Critical" elif status != "๐Ÿšจ Critical": warning_problems.append(res) status = "โš ๏ธ Warning" logs = check_remote_logs(node["host"], node["ssh_user"], node["name"]) if logs: warning_problems.extend(logs) if status != "๐Ÿšจ Critical": status = "โš ๏ธ Warning" node_status[node["name"]] = status now = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") if critical_problems: formatted = "\n".join(f"- {choose_emoji(p)} {p}" for p in critical_problems) msg = f"๐Ÿšจ Genesis Radio Critical Healthcheck {now} ๐Ÿšจ\nโšก {len(critical_problems)} critical issues found:\n{formatted}" print(msg) mastodon_dm(msg) if warning_problems: formatted = "\n".join(f"- {choose_emoji(p)} {p}" for p in warning_problems) msg = f"โš ๏ธ Genesis Radio Warning Healthcheck {now} โš ๏ธ\nโšก {len(warning_problems)} warnings found:\n{formatted}" print(msg) mastodon_dm(msg) if not critical_problems and not warning_problems: msg = f"โœ… Genesis Radio Healthcheck {now}: All systems normal." print(msg) mastodon_dm(msg) # Write healthcheck HTML dashboard with open(HEALTHCHECK_HTML, "w") as f: f.write("Genesis Radio Healthcheck") f.write(f"

Genesis Radio System Health

") f.write(f"

Last Checked: {now}

") f.write("") for node, status in node_status.items(): color = 'green' if 'Healthy' in status else ('orange' if 'Warning' in status else 'red') f.write(f"") f.write("
SystemStatus
{node}{status}
") if __name__ == "__main__": main()