#!/bin/bash # ============================================ # Proxmox Infrastruktur Health Check Script # ============================================ # Ausfuehrung: ./health-check.sh # Cronjob: */5 * * * * /opt/scripts/health-check.sh >> /var/log/health-check.log 2>&1 set -uo pipefail # Farben fuer Output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' FAILED=0 check_service() { local name=$1 local url=$2 local expected=${3:-200} response=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "$url" 2>/dev/null || echo "000") if [ "$response" = "$expected" ]; then echo -e "${GREEN}[OK]${NC} $name - HTTP $response" else echo -e "${RED}[FAIL]${NC} $name - HTTP $response (erwartet: $expected)" FAILED=$((FAILED + 1)) fi } check_container() { local name=$1 if docker ps --format '{{.Names}}' | grep -q "^${name}$"; then status=$(docker inspect --format='{{.State.Health.Status}}' "$name" 2>/dev/null || echo "no-healthcheck") if [ "$status" = "healthy" ] || [ "$status" = "no-healthcheck" ]; then echo -e "${GREEN}[OK]${NC} Container $name - Running ($status)" else echo -e "${YELLOW}[WARN]${NC} Container $name - $status" fi else echo -e "${RED}[FAIL]${NC} Container $name - Not running" FAILED=$((FAILED + 1)) fi } check_disk() { local path=$1 local threshold=${2:-90} usage=$(df "$path" | tail -1 | awk '{print $5}' | tr -d '%') if [ "$usage" -lt "$threshold" ]; then echo -e "${GREEN}[OK]${NC} Disk $path - ${usage}% verwendet" else echo -e "${RED}[FAIL]${NC} Disk $path - ${usage}% verwendet (Grenze: ${threshold}%)" FAILED=$((FAILED + 1)) fi } check_wireguard() { if wg show wg0 &>/dev/null; then latest_handshake=$(wg show wg0 latest-handshakes | awk '{print $2}') current_time=$(date +%s) diff=$((current_time - latest_handshake)) if [ "$diff" -lt 180 ]; then echo -e "${GREEN}[OK]${NC} WireGuard - Verbunden (Handshake vor ${diff}s)" else echo -e "${YELLOW}[WARN]${NC} WireGuard - Letzter Handshake vor ${diff}s" fi else echo -e "${RED}[FAIL]${NC} WireGuard - Nicht aktiv" FAILED=$((FAILED + 1)) fi } echo "==========================================" echo " Proxmox Infrastruktur Health Check" echo " $(date '+%Y-%m-%d %H:%M:%S')" echo "==========================================" echo "" echo "--- Container Status ---" check_container "nextcloud" check_container "nextcloud-db" check_container "vaultwarden" check_container "n8n" check_container "audiobookshelf" check_container "websites" check_container "api-server" check_container "gitea" echo "" echo "--- Service Endpoints (Intern) ---" check_service "Nextcloud" "http://localhost:8081/status.php" check_service "Vaultwarden" "http://localhost:8083/alive" check_service "n8n" "http://localhost:5678/healthz" check_service "Websites" "http://localhost:8082/" check_service "API" "http://localhost:8000/health" check_service "Gitea" "http://localhost:3000/api/healthz" echo "" echo "--- Netzwerk ---" check_wireguard echo "" echo "--- Disk Usage ---" check_disk "/" 85 check_disk "/opt/docker" 90 echo "" echo "==========================================" if [ $FAILED -eq 0 ]; then echo -e "${GREEN}Alle Checks bestanden!${NC}" exit 0 else echo -e "${RED}$FAILED Check(s) fehlgeschlagen!${NC}" exit 1 fi